From 7ad3bb527e25eb0a9d147d2e93f9dca605c75688 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 1 Nov 2022 16:39:20 +0100 Subject: [PATCH 001/516] Reapply [ValueLattice] Fix getCompare() for undef values Relative to the previous attempt, this also updates the ValueLattice unit tests. ----- Resolve the TODO about incorrect getCompare() behavior. This can be made more precise (e.g. by materializing the undef value and performing constant folding on it), but for now just return an unknown result to fix the correctness issue. This should be NFC in terms of user-visible behavior, because the only user of this method (SCCP) was already guarding against UndefValue results. --- llvm/include/llvm/Analysis/ValueLattice.h | 10 ++++++-- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 3 --- llvm/unittests/Analysis/ValueLatticeTest.cpp | 25 ++++++++++---------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index bc6b279e9ed52..7fe45d9f4dc96 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -453,8 +453,14 @@ class ValueLatticeElement { /// evaluated. Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, const ValueLatticeElement &Other) const { - if (isUnknownOrUndef() || Other.isUnknownOrUndef()) - return UndefValue::get(Ty); + // Not yet resolved. + if (isUnknown() || Other.isUnknown()) + return nullptr; + + // TODO: Can be made more precise, but always returning undef would be + // incorrect. + if (isUndef() || isUndef()) + return nullptr; if (isConstant() && Other.isConstant()) return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index a3455577a35c5..3d467f2fd68f9 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -1044,9 +1044,6 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State); if (C) { - // TODO: getCompare() currently has incorrect handling for unknown/undef. - if (isa(C)) - return; ValueLatticeElement CV; CV.markConstant(C); mergeInValue(&I, CV); diff --git a/llvm/unittests/Analysis/ValueLatticeTest.cpp b/llvm/unittests/Analysis/ValueLatticeTest.cpp index b0b4b5e7bdc1d..5f5f249a7c7d3 100644 --- a/llvm/unittests/Analysis/ValueLatticeTest.cpp +++ b/llvm/unittests/Analysis/ValueLatticeTest.cpp @@ -173,24 +173,25 @@ TEST_F(ValueLatticeTest, getCompareUndef) { auto *I32Ty = IntegerType::get(Context, 32); auto *I1Ty = IntegerType::get(Context, 1); + // TODO: These results can be improved. auto LV1 = ValueLatticeElement::get(UndefValue::get(I32Ty)); auto LV2 = ValueLatticeElement::getRange({APInt(32, 10, true), APInt(32, 20, true)}); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2))); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2), nullptr); auto *FloatTy = IntegerType::getFloatTy(Context); auto LV3 = ValueLatticeElement::get(ConstantFP::get(FloatTy, 1.0)); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3))); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3), nullptr); } } // end anonymous namespace From 9d074fd37066b133cab77322633717268cc3f2f9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 10:30:04 +0100 Subject: [PATCH 002/516] [SCCP] Add test for icmp that requires DL to fold (NFC) --- llvm/test/Transforms/SCCP/conditions-ranges.ll | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/test/Transforms/SCCP/conditions-ranges.ll b/llvm/test/Transforms/SCCP/conditions-ranges.ll index 3d4b529dbc79e..644b0f6994e73 100644 --- a/llvm/test/Transforms/SCCP/conditions-ranges.ll +++ b/llvm/test/Transforms/SCCP/conditions-ranges.ll @@ -1369,3 +1369,12 @@ bb139: ; preds = %bb135 bb142: ; preds = %bb139 ret void } + +define i1 @ptr_icmp_data_layout() { +; CHECK-LABEL: @ptr_icmp_data_layout( +; CHECK-NEXT: ret i1 icmp eq (ptr getelementptr inbounds (i32, ptr @A, i64 1), ptr @A) +; + %a.end = getelementptr i32, ptr @A, i64 1 + %cmp = icmp eq ptr %a.end, @A + ret i1 %cmp +} From 28b31d9ccc2420f4cb6fdaeb5630e63f502b810c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 10:33:44 +0100 Subject: [PATCH 003/516] [ValueLattice] Move getCompare() out of line (NFC) This is a fairly large method that is unlikely to benefit from inlining. --- llvm/include/llvm/Analysis/ValueLattice.h | 38 +-------------------- llvm/lib/Analysis/ValueLattice.cpp | 40 +++++++++++++++++++++++ 2 files changed, 41 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 7fe45d9f4dc96..50419f32702b4 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -452,43 +452,7 @@ class ValueLatticeElement { /// true, false or undef constants, or nullptr if the comparison cannot be /// evaluated. Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, - const ValueLatticeElement &Other) const { - // Not yet resolved. - if (isUnknown() || Other.isUnknown()) - return nullptr; - - // TODO: Can be made more precise, but always returning undef would be - // incorrect. - if (isUndef() || isUndef()) - return nullptr; - - if (isConstant() && Other.isConstant()) - return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); - - if (ICmpInst::isEquality(Pred)) { - // not(C) != C => true, not(C) == C => false. - if ((isNotConstant() && Other.isConstant() && - getNotConstant() == Other.getConstant()) || - (isConstant() && Other.isNotConstant() && - getConstant() == Other.getNotConstant())) - return Pred == ICmpInst::ICMP_NE - ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty); - } - - // Integer constants are represented as ConstantRanges with single - // elements. - if (!isConstantRange() || !Other.isConstantRange()) - return nullptr; - - const auto &CR = getConstantRange(); - const auto &OtherCR = Other.getConstantRange(); - if (CR.icmp(Pred, OtherCR)) - return ConstantInt::getTrue(Ty); - if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR)) - return ConstantInt::getFalse(Ty); - - return nullptr; - } + const ValueLatticeElement &Other) const; unsigned getNumRangeExtensions() const { return NumRangeExtensions; } void setNumRangeExtensions(unsigned N) { NumRangeExtensions = N; } diff --git a/llvm/lib/Analysis/ValueLattice.cpp b/llvm/lib/Analysis/ValueLattice.cpp index 627166e2409d3..a1c0286495c5b 100644 --- a/llvm/lib/Analysis/ValueLattice.cpp +++ b/llvm/lib/Analysis/ValueLattice.cpp @@ -9,6 +9,46 @@ #include "llvm/Analysis/ValueLattice.h" namespace llvm { +Constant * +ValueLatticeElement::getCompare(CmpInst::Predicate Pred, Type *Ty, + const ValueLatticeElement &Other) const { + // Not yet resolved. + if (isUnknown() || Other.isUnknown()) + return nullptr; + + // TODO: Can be made more precise, but always returning undef would be + // incorrect. + if (isUndef() || isUndef()) + return nullptr; + + if (isConstant() && Other.isConstant()) + return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); + + if (ICmpInst::isEquality(Pred)) { + // not(C) != C => true, not(C) == C => false. + if ((isNotConstant() && Other.isConstant() && + getNotConstant() == Other.getConstant()) || + (isConstant() && Other.isNotConstant() && + getConstant() == Other.getNotConstant())) + return Pred == ICmpInst::ICMP_NE ? ConstantInt::getTrue(Ty) + : ConstantInt::getFalse(Ty); + } + + // Integer constants are represented as ConstantRanges with single + // elements. + if (!isConstantRange() || !Other.isConstantRange()) + return nullptr; + + const auto &CR = getConstantRange(); + const auto &OtherCR = Other.getConstantRange(); + if (CR.icmp(Pred, OtherCR)) + return ConstantInt::getTrue(Ty); + if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR)) + return ConstantInt::getFalse(Ty); + + return nullptr; +} + raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) { if (Val.isUnknown()) return OS << "unknown"; From 134bda4b61ac9f5fd0583989771be8736e3dbf83 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 10:41:11 +0100 Subject: [PATCH 004/516] [ValueLattice] Use DL-aware folding in getCompare() Use DL-aware ConstantFoldCompareInstOperands() API instead of ConstantExpr API. The practical effect of this is that SCCP can now fold comparisons that require DL. --- llvm/include/llvm/Analysis/ValueLattice.h | 3 +- llvm/lib/Analysis/ValueLattice.cpp | 7 +- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 2 +- .../test/Transforms/SCCP/conditions-ranges.ll | 2 +- llvm/unittests/Analysis/ValueLatticeTest.cpp | 97 ++++++++++--------- 5 files changed, 58 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 50419f32702b4..8bf6b2a095f6c 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -452,7 +452,8 @@ class ValueLatticeElement { /// true, false or undef constants, or nullptr if the comparison cannot be /// evaluated. Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, - const ValueLatticeElement &Other) const; + const ValueLatticeElement &Other, + const DataLayout &DL) const; unsigned getNumRangeExtensions() const { return NumRangeExtensions; } void setNumRangeExtensions(unsigned N) { NumRangeExtensions = N; } diff --git a/llvm/lib/Analysis/ValueLattice.cpp b/llvm/lib/Analysis/ValueLattice.cpp index a1c0286495c5b..aec7b9950c604 100644 --- a/llvm/lib/Analysis/ValueLattice.cpp +++ b/llvm/lib/Analysis/ValueLattice.cpp @@ -7,11 +7,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ConstantFolding.h" namespace llvm { Constant * ValueLatticeElement::getCompare(CmpInst::Predicate Pred, Type *Ty, - const ValueLatticeElement &Other) const { + const ValueLatticeElement &Other, + const DataLayout &DL) const { // Not yet resolved. if (isUnknown() || Other.isUnknown()) return nullptr; @@ -22,7 +24,8 @@ ValueLatticeElement::getCompare(CmpInst::Predicate Pred, Type *Ty, return nullptr; if (isConstant() && Other.isConstant()) - return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); + return ConstantFoldCompareInstOperands(Pred, getConstant(), + Other.getConstant(), DL); if (ICmpInst::isEquality(Pred)) { // not(C) != C => true, not(C) == C => false. diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 3d467f2fd68f9..79d4ab9803b61 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -1042,7 +1042,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { auto V1State = getValueState(Op1); auto V2State = getValueState(Op2); - Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State); + Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL); if (C) { ValueLatticeElement CV; CV.markConstant(C); diff --git a/llvm/test/Transforms/SCCP/conditions-ranges.ll b/llvm/test/Transforms/SCCP/conditions-ranges.ll index 644b0f6994e73..e108d63e9091f 100644 --- a/llvm/test/Transforms/SCCP/conditions-ranges.ll +++ b/llvm/test/Transforms/SCCP/conditions-ranges.ll @@ -1372,7 +1372,7 @@ bb142: ; preds = %bb139 define i1 @ptr_icmp_data_layout() { ; CHECK-LABEL: @ptr_icmp_data_layout( -; CHECK-NEXT: ret i1 icmp eq (ptr getelementptr inbounds (i32, ptr @A, i64 1), ptr @A) +; CHECK-NEXT: ret i1 false ; %a.end = getelementptr i32, ptr @A, i64 1 %cmp = icmp eq ptr %a.end, @A diff --git a/llvm/unittests/Analysis/ValueLatticeTest.cpp b/llvm/unittests/Analysis/ValueLatticeTest.cpp index 5f5f249a7c7d3..ae221811f3fb5 100644 --- a/llvm/unittests/Analysis/ValueLatticeTest.cpp +++ b/llvm/unittests/Analysis/ValueLatticeTest.cpp @@ -23,6 +23,7 @@ namespace { class ValueLatticeTest : public testing::Test { protected: LLVMContext Context; + DataLayout DL = DataLayout(""); }; TEST_F(ValueLatticeTest, ValueLatticeGetters) { @@ -106,42 +107,42 @@ TEST_F(ValueLatticeTest, getCompareIntegers) { auto LV1 = ValueLatticeElement::get(C1); // Check getCompare for equal integer constants. - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV1)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV1)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV1)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV1, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV1, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV1, DL)->isZeroValue()); auto LV2 = ValueLatticeElement::getRange({APInt(32, 10, true), APInt(32, 20, true)}); // Check getCompare with distinct integer ranges. - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2, DL)->isZeroValue()); auto LV3 = ValueLatticeElement::getRange({APInt(32, 15, true), APInt(32, 19, true)}); // Check getCompare with a subset integer ranges. - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLT, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_NE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_EQ, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGT, I1Ty, LV3), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLT, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_NE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_EQ, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGT, I1Ty, LV3, DL), nullptr); auto LV4 = ValueLatticeElement::getRange({APInt(32, 15, true), APInt(32, 25, true)}); // Check getCompare with overlapping integer ranges. - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLT, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_NE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_EQ, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGT, I1Ty, LV4), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLT, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_NE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_EQ, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGT, I1Ty, LV4, DL), nullptr); } TEST_F(ValueLatticeTest, getCompareFloat) { @@ -152,21 +153,21 @@ TEST_F(ValueLatticeTest, getCompareFloat) { auto LV2 = ValueLatticeElement::get(C1); // Check getCompare for equal floating point constants. - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2, DL)->isZeroValue()); EXPECT_TRUE( LV1.mergeIn(ValueLatticeElement::get(ConstantFP::get(FloatTy, 2.2)))); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2, DL), nullptr); } TEST_F(ValueLatticeTest, getCompareUndef) { @@ -177,21 +178,21 @@ TEST_F(ValueLatticeTest, getCompareUndef) { auto LV1 = ValueLatticeElement::get(UndefValue::get(I32Ty)); auto LV2 = ValueLatticeElement::getRange({APInt(32, 10, true), APInt(32, 20, true)}); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2, DL), nullptr); auto *FloatTy = IntegerType::getFloatTy(Context); auto LV3 = ValueLatticeElement::get(ConstantFP::get(FloatTy, 1.0)); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3, DL), nullptr); } } // end anonymous namespace From a667aa4de041816cb4865bce8f523228f2332ffa Mon Sep 17 00:00:00 2001 From: Moritz Sichert Date: Wed, 20 Jan 2021 17:08:47 +0100 Subject: [PATCH 005/516] [RuntimeDyld] Added support for relocation of indirect functions In ELF, symbols of type STT_GNU_IFUNC need to be resolved by calling the function at the symbol's address. This is implemented by adding special stubs for all symbols of that type. Differential Revision: https://reviews.llvm.org/D105465 --- llvm/include/llvm/Object/ELFObjectFile.h | 3 + .../RuntimeDyld/RuntimeDyld.cpp | 23 ++- .../RuntimeDyld/RuntimeDyldELF.cpp | 165 +++++++++++++++++- .../RuntimeDyld/RuntimeDyldELF.h | 34 ++++ .../RuntimeDyld/RuntimeDyldImpl.h | 4 + .../RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s | 109 ++++++++++++ 6 files changed, 330 insertions(+), 8 deletions(-) create mode 100644 llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 98f6bea054314..f6d22e6e70be6 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -773,6 +773,9 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { if (isExportedToOtherDSO(ESym)) Result |= SymbolRef::SF_Exported; + if (ESym->getType() == ELF::STT_GNU_IFUNC) + Result |= SymbolRef::SF_Indirect; + if (ESym->getVisibility() == ELF::STV_HIDDEN) Result |= SymbolRef::SF_Hidden; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 54ab007323302..1585170144c7c 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -310,9 +310,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)Addr) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations. - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, Addr, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, Addr, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } else if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || SymType == object::SymbolRef::ST_Unknown || @@ -344,9 +347,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, SectOffset, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } } @@ -632,6 +638,11 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, RWDataAlign = std::max(RWDataAlign, CommonAlign); } + if (!CodeSectionSizes.empty()) { + // Add 64 bytes for a potential IFunc resolver stub + CodeSectionSizes.push_back(64); + } + // Compute the required allocation space for each different type of sections // (code, read-only data, read-write data) assuming that all sections are // allocated with the max alignment. Note that we cannot compute with the diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index b5a64a70a89a4..f343bec642756 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -2292,18 +2292,75 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(uint64_t GOTOffset, return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset); } +void RuntimeDyldELF::processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Symbol) { + // This should never return an error as `processNewSymbol` wouldn't have been + // called if getFlags() returned an error before. + auto ObjSymbolFlags = cantFail(ObjSymbol.getFlags()); + + if (ObjSymbolFlags & SymbolRef::SF_Indirect) { + if (IFuncStubSectionID == 0) { + // Create a dummy section for the ifunc stubs. It will be actually + // allocated in finalizeLoad() below. + IFuncStubSectionID = Sections.size(); + Sections.push_back( + SectionEntry(".text.__llvm_IFuncStubs", nullptr, 0, 0, 0)); + // First 64B are reserverd for the IFunc resolver + IFuncStubOffset = 64; + } + + IFuncStubs.push_back(IFuncStub{IFuncStubOffset, Symbol}); + // Modify the symbol so that it points to the ifunc stub instead of to the + // resolver function. + Symbol = SymbolTableEntry(IFuncStubSectionID, IFuncStubOffset, + Symbol.getFlags()); + IFuncStubOffset += getMaxIFuncStubSize(); + } +} + Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { if (IsMipsO32ABI) if (!PendingRelocs.empty()) return make_error("Can't find matching LO16 reloc"); + // Create the IFunc stubs if necessary. This must be done before processing + // the GOT entries, as the IFunc stubs may create some. + if (IFuncStubSectionID != 0) { + uint8_t *IFuncStubsAddr = MemMgr.allocateCodeSection( + IFuncStubOffset, 1, IFuncStubSectionID, ".text.__llvm_IFuncStubs"); + if (!IFuncStubsAddr) + return make_error( + "Unable to allocate memory for IFunc stubs!"); + Sections[IFuncStubSectionID] = + SectionEntry(".text.__llvm_IFuncStubs", IFuncStubsAddr, IFuncStubOffset, + IFuncStubOffset, 0); + + createIFuncResolver(IFuncStubsAddr); + + LLVM_DEBUG(dbgs() << "Creating IFunc stubs SectionID: " + << IFuncStubSectionID << " Addr: " + << Sections[IFuncStubSectionID].getAddress() << '\n'); + for (auto &IFuncStub : IFuncStubs) { + auto &Symbol = IFuncStub.OriginalSymbol; + LLVM_DEBUG(dbgs() << "\tSectionID: " << Symbol.getSectionID() + << " Offset: " << format("%p", Symbol.getOffset()) + << " IFuncStubOffset: " + << format("%p\n", IFuncStub.StubOffset)); + createIFuncStub(IFuncStubSectionID, 0, IFuncStub.StubOffset, + Symbol.getSectionID(), Symbol.getOffset()); + } + + IFuncStubSectionID = 0; + IFuncStubOffset = 0; + IFuncStubs.clear(); + } + // If necessary, allocate the global offset table if (GOTSectionID != 0) { // Allocate memory for the section size_t TotalSize = CurrentGOTIndex * getGOTEntrySize(); uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(), - GOTSectionID, ".got", false); + GOTSectionID, ".got", false); if (!Addr) return make_error("Unable to allocate memory for GOT!"); @@ -2326,7 +2383,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); - assert (i != SectionMap.end()); + assert(i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; } } @@ -2362,6 +2419,110 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const { return Obj.isELF(); } +void RuntimeDyldELF::createIFuncResolver(uint8_t *Addr) const { + if (Arch == Triple::x86_64) { + // The adddres of the GOT1 entry is in %r11, the GOT2 entry is in %r11+8 + // (see createIFuncStub() for details) + // The following code first saves all registers that contain the original + // function arguments as those registers are not saved by the resolver + // function. %r11 is saved as well so that the GOT2 entry can be updated + // afterwards. Then it calls the actual IFunc resolver function whose + // address is stored in GOT2. After the resolver function returns, all + // saved registers are restored and the return value is written to GOT1. + // Finally, jump to the now resolved function. + // clang-format off + const uint8_t StubCode[] = { + 0x57, // push %rdi + 0x56, // push %rsi + 0x52, // push %rdx + 0x51, // push %rcx + 0x41, 0x50, // push %r8 + 0x41, 0x51, // push %r9 + 0x41, 0x53, // push %r11 + 0x41, 0xff, 0x53, 0x08, // call *0x8(%r11) + 0x41, 0x5b, // pop %r11 + 0x41, 0x59, // pop %r9 + 0x41, 0x58, // pop %r8 + 0x59, // pop %rcx + 0x5a, // pop %rdx + 0x5e, // pop %rsi + 0x5f, // pop %rdi + 0x49, 0x89, 0x03, // mov %rax,(%r11) + 0xff, 0xe0 // jmp *%rax + }; + // clang-format on + static_assert(sizeof(StubCode) <= 64, + "maximum size of the IFunc resolver is 64B"); + memcpy(Addr, StubCode, sizeof(StubCode)); + } else { + report_fatal_error( + "IFunc resolver is not supported for target architecture"); + } +} + +void RuntimeDyldELF::createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, + uint64_t IFuncStubOffset, + unsigned IFuncSectionID, + uint64_t IFuncOffset) { + auto &IFuncStubSection = Sections[IFuncStubSectionID]; + auto *Addr = IFuncStubSection.getAddressWithOffset(IFuncStubOffset); + + if (Arch == Triple::x86_64) { + // The first instruction loads a PC-relative address into %r11 which is a + // GOT entry for this stub. This initially contains the address to the + // IFunc resolver. We can use %r11 here as it's caller saved but not used + // to pass any arguments. In fact, x86_64 ABI even suggests using %r11 for + // code in the PLT. The IFunc resolver will use %r11 to update the GOT + // entry. + // + // The next instruction just jumps to the address contained in the GOT + // entry. As mentioned above, we do this two-step jump by first setting + // %r11 so that the IFunc resolver has access to it. + // + // The IFunc resolver of course also needs to know the actual address of + // the actual IFunc resolver function. This will be stored in a GOT entry + // right next to the first one for this stub. So, the IFunc resolver will + // be able to call it with %r11+8. + // + // In total, two adjacent GOT entries (+relocation) and one additional + // relocation are required: + // GOT1: Address of the IFunc resolver. + // GOT2: Address of the IFunc resolver function. + // IFuncStubOffset+3: 32-bit PC-relative address of GOT1. + uint64_t GOT1 = allocateGOTEntries(2); + uint64_t GOT2 = GOT1 + getGOTEntrySize(); + + RelocationEntry RE1(GOTSectionID, GOT1, ELF::R_X86_64_64, + IFuncResolverOffset, {}); + addRelocationForSection(RE1, IFuncStubSectionID); + RelocationEntry RE2(GOTSectionID, GOT2, ELF::R_X86_64_64, IFuncOffset, {}); + addRelocationForSection(RE2, IFuncSectionID); + + const uint8_t StubCode[] = { + 0x4c, 0x8d, 0x1d, 0x00, 0x00, 0x00, 0x00, // leaq 0x0(%rip),%r11 + 0x41, 0xff, 0x23 // jmpq *(%r11) + }; + assert(sizeof(StubCode) <= getMaxIFuncStubSize() && + "IFunc stub size must not exceed getMaxIFuncStubSize()"); + memcpy(Addr, StubCode, sizeof(StubCode)); + + // The PC-relative value starts 4 bytes from the end of the leaq + // instruction, so the addend is -4. + resolveGOTOffsetRelocation(IFuncStubSectionID, IFuncStubOffset + 3, + GOT1 - 4, ELF::R_X86_64_PC32); + } else { + report_fatal_error("IFunc stub is not supported for target architecture"); + } +} + +unsigned RuntimeDyldELF::getMaxIFuncStubSize() const { + if (Arch == Triple::x86_64) { + return 10; + } + return 0; +} + bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { unsigned RelTy = R.getType(); if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 1251036f4caa8..fbd81e4f63bf4 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -158,6 +158,40 @@ class RuntimeDyldELF : public RuntimeDyldImpl { // Map between GOT relocation value and corresponding GOT offset std::map GOTOffsetMap; + /// The ID of the current IFunc stub section + unsigned IFuncStubSectionID = 0; + /// The current offset into the IFunc stub section + uint64_t IFuncStubOffset = 0; + + /// A IFunc stub and its original symbol + struct IFuncStub { + /// The offset of this stub in the IFunc stub section + uint64_t StubOffset; + /// The symbol table entry of the original symbol + SymbolTableEntry OriginalSymbol; + }; + + /// The IFunc stubs + SmallVector IFuncStubs; + + /// Create the code for the IFunc resolver at the given address. This code + /// works together with the stubs created in createIFuncStub() to call the + /// resolver function and then jump to the real function address. + /// It must not be larger than 64B. + void createIFuncResolver(uint8_t *Addr) const; + /// Create the code for an IFunc stub for the IFunc that is defined in + /// section IFuncSectionID at offset IFuncOffset. The IFunc resolver created + /// by createIFuncResolver() is defined in the section IFuncStubSectionID at + /// offset IFuncResolverOffset. The code should be written into the section + /// with the id IFuncStubSectionID at the offset IFuncStubOffset. + void createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, uint64_t IFuncStubOffset, + unsigned IFuncSectionID, uint64_t IFuncOffset); + /// Return the maximum size of a stub created by createIFuncStub() + unsigned getMaxIFuncStubSize() const; + + void processNewSymbol(const SymbolRef &ObjSymbol, + SymbolTableEntry &Entry) override; bool relocationNeedsGot(const RelocationRef &R) const override; bool relocationNeedsStub(const RelocationRef &R) const override; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 3940e6ea5b057..0d7ba4d822182 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -435,6 +435,10 @@ class RuntimeDyldImpl { // Return size of Global Offset Table (GOT) entry virtual size_t getGOTEntrySize() { return 0; } + // Hook for the subclasses to do further processing when a symbol is added to + // the global symbol table. This function may modify the symbol table entry. + virtual void processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Entry) {} + // Return true if the relocation R may require allocating a GOT entry. virtual bool relocationNeedsGot(const RelocationRef &R) const { return false; diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s new file mode 100644 index 0000000000000..ed8eb8833ded6 --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s @@ -0,0 +1,109 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: split-file %s %t +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/test_runner.o %t/test_runner.s +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/func_defs.o %t/func_defs.s +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -verify -check=%s %t/test_runner.o %t/func_defs.o +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -execute %t/test_runner.o %t/func_defs.o + +#--- test_runner.s + +# The _main function of this file contains calls to the two external functions +# "indirect_func" and "normal_func" that are not yet defined. They are called via +# the PLT to simulate how a compiler would emit a call to an external function. +# Eventually, indirect_func will resolve to a STT_GNU_IFUNC and normal_func to a +# regular function. We include calls to both types of functions in this test to +# test that both types of functions are executed correctly when their types are +# not known initially. +# It also contains a call to a locally defined indirect function. As RuntimeDyld +# treats local functions a bit differently than external functions, we also test +# that. +# Verify that the functions return the excpeted value. If the external indirect +# function call fails, this returns the error code 1. If the external normal +# function call fails, it's the error code 2. If the call to the locally +# defined indirect function fails, return the error code 3. + +local_real_func: + mov $0x56, %eax + ret + +local_indirect_func_resolver: + lea local_real_func(%rip), %rax + ret + + .type local_indirect_func, @gnu_indirect_function + .set local_indirect_func, local_indirect_func_resolver + + .global _main +_main: + call indirect_func@plt + cmp $0x12, %eax + je 1f + mov $1, %eax + ret +1: + + call normal_func@plt + cmp $0x34, %eax + je 1f + mov $2, %eax + ret +1: + + call local_indirect_func@plt + cmp $0x56, %eax + je 1f + mov $3, %eax + ret +1: + + xor %eax, %eax + ret + +# Test that the indirect functions have the same addresses in both calls. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_indirect_func_address_1: + lea indirect_func(%rip), %rax + +test_indirect_func_address_2: + lea indirect_func(%rip), %rax + +# rtdyld-check: decode_operand(test_local_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_local_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_local_indirect_func_address_1: + lea local_indirect_func(%rip), %rax + +test_local_indirect_func_address_2: + lea local_indirect_func(%rip), %rax + +#--- func_defs.s + +# This file contains the external functions that are called above. The type of +# the indirect function is set to @gnu_indirect_function and its value is set +# to the value of ifunc_resolver. This is what gcc emits when using +# __attribute__((ifunc("ifunc_resolver"))) in C. The resolver function just +# returns the address of the real function "real_func". +# To test that everyting works correctly, the indirect function returns 0x12 +# and the direct function returns 0x23. This is verified in the _main function +# above. + +real_func: + mov $0x12, %eax + ret + +ifunc_resolver: + lea real_func(%rip), %rax + ret + + .global indirect_func + .type indirect_func, @gnu_indirect_function + .set indirect_func, ifunc_resolver + + .global normal_func +normal_func: + mov $0x34, %eax + ret + +# Test that the address of the indirect function is equal even when it is +# defined in another object file. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_3, 4) + next_pc(test_indirect_func_address_3) +test_indirect_func_address_3: + lea indirect_func(%rip), %rax From 0c1f9b3f17bcb0639d5f2684771ef21c9508632c Mon Sep 17 00:00:00 2001 From: Anton Sidorenko Date: Tue, 1 Nov 2022 19:32:39 +0300 Subject: [PATCH 006/516] [MachineCombiner] Add `const` to `shouldReduceRegisterPressure` arguments. NFC Differential Revision: https://reviews.llvm.org/D137174 --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 14 ++++++-------- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 6 +++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 40663f95fa0a8..189db64609a48 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1148,8 +1148,8 @@ class TargetInstrInfo : public MCInstrInfo { /// Return true if target supports reassociation of instructions in machine /// combiner pass to reduce register pressure for a given BB. virtual bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const { + shouldReduceRegisterPressure(const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const { return false; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 965bdaaa8ecc5..7a7dd2f30e943 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -613,7 +613,7 @@ void PPCInstrInfo::finalizeInsInstrs( } bool PPCInstrInfo::shouldReduceRegisterPressure( - MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { + const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const { if (!EnableFMARegPressureReduction) return false; @@ -635,10 +635,11 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( return false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); + const MachineFunction *MF = MBB->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); - auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector { + auto GetMBBPressure = + [&](const MachineBasicBlock *MBB) -> std::vector { RegionPressure Pressure; RegPressureTracker RPTracker(Pressure); @@ -646,10 +647,7 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - for (MachineBasicBlock::iterator MII = MBB->instr_end(), - MIE = MBB->instr_begin(); - MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); + for (const auto &MI : reverse(*MBB)) { if (MI.isDebugValue() || MI.isDebugLabel()) continue; RegisterOperands RegOpers; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 4c720e251f15c..9f150be7f746c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -471,9 +471,9 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// when the register pressure is high for one BB. /// Return true if register pressure for \p MBB is high and ABI is supported /// to reduce register pressure. Otherwise return false. - bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const override; + bool shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const override; /// Fixup the placeholders we put in genAlternativeCodeSequence() for /// MachineCombiner. From 2e36dadbd6eab10641ac53969ae3dee0b0391e25 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Mon, 31 Oct 2022 14:16:03 +0100 Subject: [PATCH 007/516] [mlir] Make `OperationFingerPrint` class public It can be useful to external users as well for detecting if there were any changes in IR between passes. Differential Revision: https://reviews.llvm.org/D137078 --- mlir/include/mlir/IR/OperationSupport.h | 23 +++++++++++ mlir/lib/IR/OperationSupport.cpp | 40 ++++++++++++++++++ mlir/lib/Pass/IRPrinting.cpp | 55 ------------------------- 3 files changed, 63 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 3ce7ff37c8252..24732decc856a 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -894,6 +894,29 @@ struct OperationEquivalence { /// Enable Bitmask enums for OperationEquivalence::Flags. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +//===----------------------------------------------------------------------===// +// OperationFingerPrint +//===----------------------------------------------------------------------===// + +/// A unique fingerprint for a specific operation, and all of it's internal +/// operations. +class OperationFingerPrint { +public: + OperationFingerPrint(Operation *topOp); + OperationFingerPrint(const OperationFingerPrint &) = default; + OperationFingerPrint &operator=(const OperationFingerPrint &) = default; + + bool operator==(const OperationFingerPrint &other) const { + return hash == other.hash; + } + bool operator!=(const OperationFingerPrint &other) const { + return !(*this == other); + } + +private: + std::array hash; +}; + } // namespace mlir namespace llvm { diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index 33828a954c633..d46f1b46bf7b8 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/OpDefinition.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Support/SHA1.h" #include using namespace mlir; @@ -757,3 +758,42 @@ bool OperationEquivalence::isEquivalentTo( return false; return true; } + +//===----------------------------------------------------------------------===// +// OperationFingerPrint +//===----------------------------------------------------------------------===// + +template +static void addDataToHash(llvm::SHA1 &hasher, const T &data) { + hasher.update( + ArrayRef(reinterpret_cast(&data), sizeof(T))); +} + +OperationFingerPrint::OperationFingerPrint(Operation *topOp) { + llvm::SHA1 hasher; + + // Hash each of the operations based upon their mutable bits: + topOp->walk([&](Operation *op) { + // - Operation pointer + addDataToHash(hasher, op); + // - Attributes + addDataToHash(hasher, op->getAttrDictionary()); + // - Blocks in Regions + for (Region ®ion : op->getRegions()) { + for (Block &block : region) { + addDataToHash(hasher, &block); + for (BlockArgument arg : block.getArguments()) + addDataToHash(hasher, arg); + } + } + // - Location + addDataToHash(hasher, op->getLoc().getAsOpaquePointer()); + // - Operands + for (Value operand : op->getOperands()) + addDataToHash(hasher, operand); + // - Successors + for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) + addDataToHash(hasher, op->getSuccessor(i)); + }); + hash = hasher.result(); +} diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index c20d9b1e9135f..ee52bf81847c2 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -11,66 +11,11 @@ #include "mlir/Pass/PassManager.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/SHA1.h" using namespace mlir; using namespace mlir::detail; namespace { -//===----------------------------------------------------------------------===// -// OperationFingerPrint -//===----------------------------------------------------------------------===// - -/// A unique fingerprint for a specific operation, and all of it's internal -/// operations. -class OperationFingerPrint { -public: - OperationFingerPrint(Operation *topOp) { - llvm::SHA1 hasher; - - // Hash each of the operations based upon their mutable bits: - topOp->walk([&](Operation *op) { - // - Operation pointer - addDataToHash(hasher, op); - // - Attributes - addDataToHash(hasher, op->getAttrDictionary()); - // - Blocks in Regions - for (Region ®ion : op->getRegions()) { - for (Block &block : region) { - addDataToHash(hasher, &block); - for (BlockArgument arg : block.getArguments()) - addDataToHash(hasher, arg); - } - } - // - Location - addDataToHash(hasher, op->getLoc().getAsOpaquePointer()); - // - Operands - for (Value operand : op->getOperands()) - addDataToHash(hasher, operand); - // - Successors - for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) - addDataToHash(hasher, op->getSuccessor(i)); - }); - hash = hasher.result(); - } - - bool operator==(const OperationFingerPrint &other) const { - return hash == other.hash; - } - bool operator!=(const OperationFingerPrint &other) const { - return !(*this == other); - } - -private: - template - void addDataToHash(llvm::SHA1 &hasher, const T &data) { - hasher.update( - ArrayRef(reinterpret_cast(&data), sizeof(T))); - } - - std::array hash; -}; - //===----------------------------------------------------------------------===// // IRPrinter //===----------------------------------------------------------------------===// From a68bcd81dcc90fc7d6fbe4013569774a19097c4a Mon Sep 17 00:00:00 2001 From: Tom Praschan <13141438+tom-anders@users.noreply.github.com> Date: Wed, 2 Nov 2022 12:50:50 +0100 Subject: [PATCH 008/516] [clangd] Index unscoped enums in class scope for code completion Fixes https://github.com/clangd/clangd/issues/1082 Differential Revision: https://reviews.llvm.org/D136925 --- clang-tools-extra/clangd/CodeComplete.cpp | 8 +++- .../clangd/unittests/CodeCompleteTests.cpp | 41 ++++++++++++++++--- .../clangd/unittests/SymbolCollectorTests.cpp | 9 +++- .../clangd/unittests/TestIndex.cpp | 4 ++ .../clangd/unittests/TestIndex.h | 2 + 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 5612fc599fb50..a3e518b4ba054 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -2123,6 +2123,9 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { }; return false; }; + auto InClassScope = [](const NamedDecl &ND) { + return ND.getDeclContext()->getDeclKind() == Decl::CXXRecord; + }; // We only complete symbol's name, which is the same as the name of the // *primary* template in case of template specializations. if (isExplicitTemplateSpecialization(&ND)) @@ -2138,8 +2141,11 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { if (InTopLevelScope(ND)) return true; + // Always index enum constants, even if they're not in the top level scope: + // when + // --all-scopes-completion is set, we'll want to complete those as well. if (const auto *EnumDecl = dyn_cast(ND.getDeclContext())) - return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped(); + return (InTopLevelScope(*EnumDecl) || InClassScope(*EnumDecl)) && !EnumDecl->isScoped(); return false; } diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index db700556e1d24..77451bf445e0f 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -2967,14 +2967,20 @@ TEST(CompletionTest, AllScopesCompletion) { } )cpp", {cls("nx::Clangd1"), cls("ny::Clangd2"), cls("Clangd3"), - cls("na::nb::Clangd4")}, + cls("na::nb::Clangd4"), enmConstant("na::C::Clangd5")}, Opts); EXPECT_THAT( Results.Completions, - UnorderedElementsAre(AllOf(qualifier("nx::"), named("Clangd1")), - AllOf(qualifier("ny::"), named("Clangd2")), - AllOf(qualifier(""), scope(""), named("Clangd3")), - AllOf(qualifier("nb::"), named("Clangd4")))); + UnorderedElementsAre(AllOf(qualifier("nx::"), named("Clangd1"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("ny::"), named("Clangd2"), + kind(CompletionItemKind::Class)), + AllOf(qualifier(""), scope(""), named("Clangd3"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("nb::"), named("Clangd4"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("C::"), named("Clangd5"), + kind(CompletionItemKind::EnumMember)))); } TEST(CompletionTest, NoQualifierIfShadowed) { @@ -3358,6 +3364,31 @@ TEST(CompletionTest, UsingDecl) { kind(CompletionItemKind::Reference)))); } +TEST(CompletionTest, Enums) { + const char *Header(R"cpp( + namespace ns { + enum Unscoped { Clangd1 }; + class C { + enum Unscoped { Clangd2 }; + }; + enum class Scoped { Clangd3 }; + })cpp"); + const char *Source(R"cpp( + void bar() { + Clangd^ + })cpp"); + auto Index = TestTU::withHeaderCode(Header).index(); + clangd::CodeCompleteOptions Opts; + Opts.Index = Index.get(); + Opts.AllScopes = true; + auto R = completions(Source, {}, Opts); + EXPECT_THAT(R.Completions, + ElementsAre(AllOf(scope("ns::"), named("Clangd1"), + kind(CompletionItemKind::EnumMember)), + AllOf(scope("ns::C::"), named("Clangd2"), + kind(CompletionItemKind::EnumMember)))); +} + TEST(CompletionTest, ScopeIsUnresolved) { clangd::CodeCompleteOptions Opts = {}; Opts.AllScopes = true; diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 8dc7877c17849..bb651b851afeb 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1316,6 +1316,11 @@ TEST_F(SymbolCollectorTest, IncludeEnums) { Black }; } + class Color3 { + enum { + Blue + }; + }; )"; runSymbolCollector(Header, /*Main=*/""); EXPECT_THAT(Symbols, @@ -1326,7 +1331,9 @@ TEST_F(SymbolCollectorTest, IncludeEnums) { AllOf(qName("Color2"), forCodeCompletion(true)), AllOf(qName("Color2::Yellow"), forCodeCompletion(false)), AllOf(qName("ns"), forCodeCompletion(true)), - AllOf(qName("ns::Black"), forCodeCompletion(true)))); + AllOf(qName("ns::Black"), forCodeCompletion(true)), + AllOf(qName("Color3"), forCodeCompletion(true)), + AllOf(qName("Color3::Blue"), forCodeCompletion(true)))); } TEST_F(SymbolCollectorTest, NamelessSymbols) { diff --git a/clang-tools-extra/clangd/unittests/TestIndex.cpp b/clang-tools-extra/clangd/unittests/TestIndex.cpp index c247a9c2e90c9..11282bc34231a 100644 --- a/clang-tools-extra/clangd/unittests/TestIndex.cpp +++ b/clang-tools-extra/clangd/unittests/TestIndex.cpp @@ -69,6 +69,10 @@ Symbol enm(llvm::StringRef Name) { return sym(Name, index::SymbolKind::Enum, "@E@\\0"); } +Symbol enmConstant(llvm::StringRef Name) { + return sym(Name, index::SymbolKind::EnumConstant, "@\\0"); +} + Symbol var(llvm::StringRef Name) { return sym(Name, index::SymbolKind::Variable, "@\\0"); } diff --git a/clang-tools-extra/clangd/unittests/TestIndex.h b/clang-tools-extra/clangd/unittests/TestIndex.h index 0cd8a713c31dd..9280b0b12a67f 100644 --- a/clang-tools-extra/clangd/unittests/TestIndex.h +++ b/clang-tools-extra/clangd/unittests/TestIndex.h @@ -27,6 +27,8 @@ Symbol func(llvm::StringRef Name); Symbol cls(llvm::StringRef Name); // Creates an enum symbol. Symbol enm(llvm::StringRef Name); +// Creates an enum constant symbol. +Symbol enmConstant(llvm::StringRef Name); // Creates a variable symbol. Symbol var(llvm::StringRef Name); // Creates a namespace symbol. From 2baabd2c19ee972926a96fa01838ccb7901cac32 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 2 Nov 2022 18:02:59 +0700 Subject: [PATCH 009/516] [LoopPredication][NFCI] Perform 'visited' check before pushing to worklist This prevents duplicates to be pushed into the stack and hypothetically should reduce memory footprint on ugly cornercases with multiple repeating duplicates in 'and' tree. --- llvm/lib/Transforms/Scalar/LoopPredication.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index fbd4a39c7949e..1e4060abeb885 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -763,17 +763,17 @@ unsigned LoopPredication::collectChecks(SmallVectorImpl &Checks, // resulting list of subconditions in Checks vector. SmallVector Worklist(1, Condition); SmallPtrSet Visited; + Visited.insert(Condition); Value *WideableCond = nullptr; do { Value *Condition = Worklist.pop_back_val(); - if (!Visited.insert(Condition).second) - continue; - Value *LHS, *RHS; using namespace llvm::PatternMatch; if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { - Worklist.push_back(LHS); - Worklist.push_back(RHS); + if (Visited.insert(LHS).second) + Worklist.push_back(LHS); + if (Visited.insert(RHS).second) + Worklist.push_back(RHS); continue; } From 86dc6a3c0feeaca5ee442dd624ef4faaa895886e Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 2 Nov 2022 11:03:59 +0000 Subject: [PATCH 010/516] [AMDGPU] Constify a couple of methods. NFC. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e0ae4c2581f6..4314595684710 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -216,7 +216,7 @@ class WaitcntBrackets { } // Mapping from event to counter. - InstCounterType eventCounter(WaitEventType E) { + InstCounterType eventCounter(WaitEventType E) const { for (auto T : inst_counter_types()) { if (WaitEventMaskForInst[T] & (1 << E)) return T; @@ -471,7 +471,7 @@ class SIInsertWaitcnts : public MachineFunctionPass { bool applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait, - MachineBasicBlock::instr_iterator It); + MachineBasicBlock::instr_iterator It) const; }; } // end anonymous namespace @@ -850,7 +850,7 @@ FunctionPass *llvm::createSIInsertWaitcntsPass() { /// preexisting waitcnt are required for correctness. bool SIInsertWaitcnts::applyPreexistingWaitcnt( WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, - AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) { + AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) const { bool Modified = false; MachineInstr *WaitcntInstr = nullptr; MachineInstr *WaitcntVsCntInstr = nullptr; From 9c05b7c06f0d0ed69ae9f9f8c581566357b235ad Mon Sep 17 00:00:00 2001 From: Moritz Sichert Date: Wed, 2 Nov 2022 12:08:52 +0100 Subject: [PATCH 011/516] [RuntimeDyld] Require x86_64-linux for IFUNC test This test was introduced by a667aa4de041816cb4865bce8f523228f2332ffa. It assumes that it runs on x86_64 on linux, so require that for the test. --- llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s index ed8eb8833ded6..adcd27613ea96 100644 --- a/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s +++ b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s @@ -1,3 +1,4 @@ +# REQUIRES: x86_64-linux # RUN: rm -rf %t && mkdir -p %t # RUN: split-file %s %t # RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/test_runner.o %t/test_runner.s From 7e1963b1917f533d4283da43a8d016e2138c9cf6 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 1 Nov 2022 16:22:45 +0100 Subject: [PATCH 012/516] [AMDGPU][NFC] Split MC tests into promoted from VOP2 to VOP3 and only VOP3 Differential Revision: https://reviews.llvm.org/D136148 --- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 2275 +---------------- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 2002 +-------------- .../AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s | 1986 ++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 546 ---- .../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s | 550 ++++ .../test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s | 2187 ++++++++++++++++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 1846 +------------ .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 1702 +----------- .../gfx11_dasm_vop3_dpp16_from_vop2.txt | 1698 ++++++++++++ .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 396 +-- .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt | 396 +++ .../AMDGPU/gfx11_dasm_vop3_from_vop2.txt | 1842 +++++++++++++ 12 files changed, 8722 insertions(+), 8704 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 3df2843deade6..d7908d75ecf91 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -48,113 +48,6 @@ v_add3_u32 v5, src_scc, vcc_lo, -1 v_add3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_add_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x20,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x20,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x20,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x20,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - v_add_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -270,96 +163,6 @@ v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_add_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] - -v_add_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] - -v_add_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] - -v_add_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] - -v_add_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_add_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] - -v_add_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] - -v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] - -v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_add_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] - -v_add_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] - -v_add_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] - -v_add_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] - -v_add_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_add_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] - -v_add_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] - -v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] - -v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_add_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] @@ -576,51 +379,6 @@ v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp // GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] - -v_add_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] - -v_add_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] - -v_add_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] - -v_add_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_add_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] - -v_add_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] - -v_add_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] - -v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_alignbit_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] @@ -756,51 +514,6 @@ v_and_b16 v5, src_scc, vcc_lo v_and_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_and_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] - -v_and_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] - -v_and_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] - -v_and_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] - -v_and_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] - -v_and_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_and_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] - -v_and_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] - -v_and_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] - -v_and_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] - -v_and_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] - -v_and_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] - -v_and_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] - -v_and_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] - -v_and_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_and_or_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -891,51 +604,6 @@ v_ashrrev_i16 v5, src_scc, vcc_lo v_ashrrev_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_ashrrev_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] - -v_ashrrev_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] - -v_ashrrev_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] - -v_ashrrev_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] - -v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] - -v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_ashrrev_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] - -v_ashrrev_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] - -v_ashrrev_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] - -v_ashrrev_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] - -v_ashrrev_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] - -v_ashrrev_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] - -v_ashrrev_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] - -v_ashrrev_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] - -v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_ashrrev_i64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] @@ -1548,113 +1216,6 @@ v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null // GFX11: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cndmask_b32_e64 v5, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, v255, src_scc, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, s105, s105, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, m0, 0.5, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_hi, -1, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, null, exec_hi, s105 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -1, m0, vcc_lo -// W32: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_hi -// W32: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xac,0x41] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp15 -// W32: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xec,0x21] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, s105, s105, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -1, m0, s[104:105] -// W64: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc -// W64: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] -// W64: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null -// GFX11: encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] - v_cos_f16_e64 v5, v1 // GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] @@ -3212,51 +2773,6 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp -// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - v_cvt_pk_u16_f32 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -3572,51 +3088,6 @@ v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| // GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_pkrtz_f16_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pkrtz_f16_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] - -v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] - -v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp -// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - v_cvt_rpi_i32_f32_e64 v5, v1 // GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] @@ -4928,251 +4399,71 @@ v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_fmac_dx9_zero_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] - -v_fmac_dx9_zero_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] +v_fract_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] +v_fract_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] +v_fract_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] +v_fract_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_fract_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] +v_fract_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] +v_fract_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] +v_fract_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] +v_fract_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] +v_fract_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] +v_fract_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] +v_fract_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] -v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] +v_fract_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] -v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +v_fract_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] -v_fmac_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -v_fmac_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +v_fract_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] -v_fmac_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +v_fract_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] -v_fmac_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +v_fract_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] -v_fmac_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +v_fract_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] -v_fmac_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_fract_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] -v_fmac_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +v_fract_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] -v_fmac_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] - -v_fmac_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] - -v_fmac_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] - -v_fmac_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] - -v_fmac_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] - -v_fmac_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] - -v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] - -v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_fmac_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] - -v_fmac_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] - -v_fmac_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] - -v_fmac_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] - -v_fmac_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] - -v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_fmac_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] - -v_fmac_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] - -v_fmac_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] - -v_fmac_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] - -v_fmac_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] - -v_fmac_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] - -v_fmac_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] - -v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] - -v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_fmac_legacy_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] - -v_fmac_legacy_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] - -v_fmac_legacy_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_fmac_legacy_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] - -v_fmac_legacy_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] - -v_fmac_legacy_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] - -v_fmac_legacy_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] - -v_fmac_legacy_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] - -v_fmac_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] - -v_fmac_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_fract_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] - -v_fract_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] - -v_fract_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] - -v_fract_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] - -v_fract_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] - -v_fract_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] - -v_fract_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] - -v_fract_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] - -v_fract_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] - -v_fract_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] - -v_fract_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] - -v_fract_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] - -v_fract_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] - -v_fract_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] - -v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_fract_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] - -v_fract_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] - -v_fract_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] - -v_fract_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] - -v_fract_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] - -v_fract_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] - -v_fract_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] +v_fract_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] v_fract_f32_e64 v5, m0 // GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] @@ -5486,51 +4777,6 @@ v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 // GFX11: encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -v_ldexp_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] - -v_ldexp_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] - -v_ldexp_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] - -v_ldexp_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] - -v_ldexp_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] - -v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_ldexp_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] - -v_ldexp_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] - -v_ldexp_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] - -v_ldexp_f16_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] - -v_ldexp_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] - -v_ldexp_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] - -v_ldexp_f16_e64 v5, 0.5, m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] - -v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] - -v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 -// GFX11: encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] - v_ldexp_f32 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] @@ -5891,51 +5137,6 @@ v_lshlrev_b16 v5, src_scc, vcc_lo v_lshlrev_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshlrev_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] - -v_lshlrev_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] - -v_lshlrev_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] - -v_lshlrev_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] - -v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] - -v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshlrev_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] - -v_lshlrev_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] - -v_lshlrev_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] - -v_lshlrev_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] - -v_lshlrev_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] - -v_lshlrev_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] - -v_lshlrev_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] - -v_lshlrev_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] - -v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_lshlrev_b64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] @@ -6008,51 +5209,6 @@ v_lshrrev_b16 v5, src_scc, vcc_lo v_lshrrev_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshrrev_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] - -v_lshrrev_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] - -v_lshrrev_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] - -v_lshrrev_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] - -v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] - -v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshrrev_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] - -v_lshrrev_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] - -v_lshrrev_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] - -v_lshrrev_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] - -v_lshrrev_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] - -v_lshrrev_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] - -v_lshrrev_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] - -v_lshrrev_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] - -v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_lshrrev_b64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] @@ -6770,96 +5926,6 @@ v_max3_u32 v5, src_scc, vcc_lo, -1 v_max3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_max_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] - -v_max_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] - -v_max_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] - -v_max_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] - -v_max_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_max_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] - -v_max_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] - -v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] - -v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_max_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] - -v_max_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] - -v_max_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] - -v_max_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] - -v_max_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] - -v_max_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] - -v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] - -v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_max_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] @@ -6941,51 +6007,6 @@ v_max_i16 v5, src_scc, vcc_lo v_max_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] - -v_max_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] - -v_max_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] - -v_max_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] - -v_max_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] - -v_max_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] - -v_max_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] - -v_max_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_max_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] @@ -7031,51 +6052,6 @@ v_max_u16 v5, src_scc, vcc_lo v_max_u16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] - -v_max_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] - -v_max_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] - -v_max_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] - -v_max_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] - -v_max_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] - -v_max_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] - -v_max_u32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_maxmin_f16 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] @@ -7886,96 +6862,6 @@ v_min3_u32 v5, src_scc, vcc_lo, -1 v_min3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] - -v_min_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] - -v_min_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] - -v_min_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] - -v_min_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_min_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] - -v_min_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] - -v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] - -v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_min_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] - -v_min_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] - -v_min_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] - -v_min_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] - -v_min_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] - -v_min_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] - -v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] - -v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_min_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] @@ -8057,51 +6943,6 @@ v_min_i16 v5, src_scc, vcc_lo v_min_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] - -v_min_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] - -v_min_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] - -v_min_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] - -v_min_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] - -v_min_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] - -v_min_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] - -v_min_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_min_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] @@ -8147,53 +6988,8 @@ v_min_u16 v5, src_scc, vcc_lo v_min_u16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] - -v_min_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] - -v_min_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] - -v_min_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] - -v_min_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] - -v_min_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] - -v_min_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] - -v_min_u32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_minmax_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +v_minmax_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] v_minmax_f16 v5, v255, s2, s105 // GFX11: encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] @@ -8615,141 +7411,6 @@ v_msad_u8 v5, src_scc, vcc_lo, -1 v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp // GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_mul_dx9_zero_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] - -v_mul_dx9_zero_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] - -v_mul_dx9_zero_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_mul_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] - -v_mul_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] - -v_mul_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] - -v_mul_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_mul_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_mul_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] - -v_mul_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] - -v_mul_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] - -v_mul_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_mul_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] @@ -8831,51 +7492,6 @@ v_mul_hi_i32 v5, src_scc, vcc_lo v_mul_hi_i32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_hi_i32_i24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] - -v_mul_hi_i32_i24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] - -v_mul_hi_i32_i24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_mul_hi_u32 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] @@ -8921,141 +7537,6 @@ v_mul_hi_u32 v5, src_scc, vcc_lo v_mul_hi_u32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_hi_u32_u24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] - -v_mul_hi_u32_u24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] - -v_mul_hi_u32_u24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_mul_i32_i24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] - -v_mul_i32_i24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] - -v_mul_i32_i24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] - -v_mul_i32_i24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_i32_i24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_i32_i24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_i32_i24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_i32_i24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_i32_i24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_i32_i24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_i32_i24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_i32_i24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_mul_legacy_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] - -v_mul_legacy_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] - -v_mul_legacy_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] - -v_mul_legacy_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_legacy_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_legacy_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_legacy_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_legacy_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_legacy_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_legacy_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_legacy_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_legacy_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_legacy_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_mul_lo_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] @@ -9146,51 +7627,6 @@ v_mul_lo_u32 v5, src_scc, vcc_lo v_mul_lo_u32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_u32_u24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] - -v_mul_u32_u24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] - -v_mul_u32_u24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] - -v_mul_u32_u24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_u32_u24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_u32_u24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_u32_u24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_u32_u24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_u32_u24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_u32_u24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_u32_u24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_u32_u24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_mullit_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] @@ -9419,51 +7855,6 @@ v_or_b16 v5, src_scc, vcc_lo v_or_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_or_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] - -v_or_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] - -v_or_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] - -v_or_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] - -v_or_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] - -v_or_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_or_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] - -v_or_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] - -v_or_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] - -v_or_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] - -v_or_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] - -v_or_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] - -v_or_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] - -v_or_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] - -v_or_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_pack_b32_f16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -10559,113 +8950,6 @@ v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 // GFX11: encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -v_sub_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x21,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x21,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x21,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x21,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - v_sub_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -10781,96 +9065,6 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] - -v_sub_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] - -v_sub_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] - -v_sub_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_sub_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] - -v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] - -v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_sub_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] - -v_sub_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] - -v_sub_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] - -v_sub_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_sub_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] - -v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] - -v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_sub_nc_i16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] @@ -11006,158 +9200,6 @@ v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp // GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] - -v_sub_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] - -v_sub_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] - -v_sub_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_sub_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] - -v_sub_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] - -v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x22,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x22,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x22,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x22,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - v_subrev_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -11273,141 +9315,6 @@ v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_subrev_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_subrev_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] - -v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] - -v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_subrev_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] - -v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] - -v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_subrev_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] - -v_subrev_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] - -v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_trig_preop_f64 v[5:6], v[1:2], v2 // GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] @@ -11663,51 +9570,6 @@ v_xad_u32 v5, src_scc, vcc_lo, -1 v_xad_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_xnor_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] - -v_xnor_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] - -v_xnor_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] - -v_xnor_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] - -v_xnor_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] - -v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_xnor_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] - -v_xnor_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] - -v_xnor_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] - -v_xnor_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] - -v_xnor_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] - -v_xnor_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] - -v_xnor_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] - -v_xnor_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] - -v_xnor_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_xor3_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] @@ -11797,48 +9659,3 @@ v_xor_b16 v5, src_scc, vcc_lo v_xor_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_xor_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] - -v_xor_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] - -v_xor_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] - -v_xor_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] - -v_xor_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] - -v_xor_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_xor_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] - -v_xor_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] - -v_xor_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] - -v_xor_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] - -v_xor_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] - -v_xor_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] - -v_xor_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] - -v_xor_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] - -v_xor_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 0ae183f504875..fb3a7f4524059 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -45,113 +45,6 @@ v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x20,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x20,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -259,90 +152,6 @@ v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -511,48 +320,6 @@ v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -679,48 +446,6 @@ v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -805,48 +530,6 @@ v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1374,113 +1057,6 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_m v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_mirror -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_half_mirror -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_ror:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s105 row_ror:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x03,0x01,0xd5,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] - v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -2741,48 +2317,6 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -3077,48 +2611,6 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -3959,48 +3451,6 @@ v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] - -v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] - v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4295,48 +3745,6 @@ v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4379,48 +3787,6 @@ v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4925,104 +4291,20 @@ v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_max_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_max_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_i16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_i16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_i16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_max_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] v_max_i16_e64_dpp v5, v1, v2 row_shl:15 // GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] @@ -5051,48 +4333,6 @@ v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -5135,48 +4375,6 @@ v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5933,90 +5131,6 @@ v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6059,48 +5173,6 @@ v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6143,48 +5215,6 @@ v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -6605,300 +5635,6 @@ v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6941,48 +5677,6 @@ v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -7193,48 +5887,6 @@ v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -7991,113 +6643,6 @@ v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x21,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x21,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -8205,90 +6750,6 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -8415,155 +6876,6 @@ v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x22,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x22,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -8671,132 +6983,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_ma v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] @@ -8923,48 +7109,6 @@ v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -9049,48 +7193,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] @@ -9157,90 +7259,6 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s new file mode 100644 index 0000000000000..2e7d1f6df12d4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s @@ -0,0 +1,1986 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x20,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x20,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x01,0xd5,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] + +v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x21,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x21,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x22,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x22,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index e9e5321a0adea..6440cd4c9b844 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -39,45 +39,6 @@ v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x20,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x20,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x20,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x20,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x20,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -117,30 +78,6 @@ v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x32,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x32,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x03,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x03,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -204,15 +141,6 @@ v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x25,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -288,15 +216,6 @@ v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -342,15 +261,6 @@ v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -567,45 +477,6 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x02,0x01,0xd5,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x02,0x01,0xd5,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x03,0x01,0xd5,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] - v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -1032,18 +903,6 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] - v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1149,18 +1008,6 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] - v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -1446,18 +1293,6 @@ v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x81,0xc0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x3b,0xd5,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] - v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1611,15 +1446,6 @@ v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x18,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1629,15 +1455,6 @@ v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2058,30 +1875,6 @@ v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x10,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x10,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2091,15 +1884,6 @@ v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x12,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2109,15 +1893,6 @@ v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2700,30 +2475,6 @@ v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x0f,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x0f,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2733,15 +2484,6 @@ v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x11,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2751,15 +2493,6 @@ v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x13,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2985,81 +2718,6 @@ v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x35,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x35,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x08,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x08,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x0a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x0c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x09,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3069,15 +2727,6 @@ v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x0b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3177,15 +2826,6 @@ v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3519,45 +3159,6 @@ v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x81,0xb3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x21,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x21,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x21,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x21,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x21,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3597,30 +3198,6 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x33,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x33,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x04,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x04,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3648,54 +3225,6 @@ v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x26,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x22,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x22,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x22,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x22,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x22,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3735,39 +3264,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x34,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x34,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x05,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x05,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x27,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] @@ -3828,15 +3324,6 @@ v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1e,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3882,15 +3369,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1d,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3957,30 +3435,6 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s new file mode 100644 index 0000000000000..6e1a65d7d3210 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s @@ -0,0 +1,550 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x20,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x20,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x20,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x20,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x20,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x32,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x32,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x03,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x03,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x01,0xd5,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x01,0xd5,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x01,0xd5,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x3b,0xd5,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x18,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x10,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x10,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x12,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x0f,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x0f,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x11,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x13,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x35,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x35,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x08,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x08,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x09,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x21,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x21,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x21,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x21,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x21,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x33,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x33,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x04,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x04,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x26,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x22,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x22,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x22,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x22,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x22,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x34,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x34,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x05,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x05,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x27,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s new file mode 100644 index 0000000000000..43c71617bb385 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -0,0 +1,2187 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x20,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x20,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x20,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x20,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] + +v_add_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] + +v_add_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] + +v_add_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] + +v_add_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] + +v_add_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] + +v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] + +v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_add_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] + +v_add_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] + +v_add_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] + +v_add_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] + +v_add_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] + +v_add_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] + +v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] + +v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_add_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] + +v_add_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] + +v_add_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] + +v_add_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] + +v_add_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] + +v_add_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] + +v_add_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] + +v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_and_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] + +v_and_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] + +v_and_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] + +v_and_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] + +v_and_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] + +v_and_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_and_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] + +v_and_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] + +v_and_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] + +v_and_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] + +v_and_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] + +v_and_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] + +v_and_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] + +v_and_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] + +v_and_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] + +v_ashrrev_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] + +v_ashrrev_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] + +v_ashrrev_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] + +v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] + +v_ashrrev_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b32_e64 v5, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v255, src_scc, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, m0, 0.5, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_hi, -1, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, null, exec_hi, s105 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -1, m0, vcc_lo +// W32: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_hi +// W32: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xac,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -1, m0, s[104:105] +// W64: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc +// W64: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null +// GFX11: encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] + +v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] + +v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] + +v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_dx9_zero_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_dx9_zero_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_fmac_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_fmac_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_legacy_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_legacy_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_ldexp_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] + +v_ldexp_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] + +v_ldexp_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] + +v_ldexp_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] + +v_ldexp_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] + +v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ldexp_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] + +v_ldexp_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] + +v_ldexp_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] + +v_ldexp_f16_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] + +v_ldexp_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] + +v_ldexp_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] + +v_ldexp_f16_e64 v5, 0.5, m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] + +v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] + +v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 +// GFX11: encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] + +v_lshlrev_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] + +v_lshlrev_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] + +v_lshlrev_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] + +v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshlrev_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] + +v_lshlrev_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] + +v_lshrrev_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] + +v_lshrrev_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] + +v_lshrrev_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] + +v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] + +v_lshrrev_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_max_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] + +v_max_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] + +v_max_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] + +v_max_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] + +v_max_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] + +v_max_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] + +v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] + +v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_max_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] + +v_max_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] + +v_max_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] + +v_max_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] + +v_max_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] + +v_max_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] + +v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] + +v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_max_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] + +v_max_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] + +v_max_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] + +v_max_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] + +v_max_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] + +v_max_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] + +v_max_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] + +v_max_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_max_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] + +v_max_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] + +v_max_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] + +v_max_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] + +v_max_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] + +v_max_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] + +v_max_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] + +v_max_u32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_min_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] + +v_min_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] + +v_min_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] + +v_min_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] + +v_min_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] + +v_min_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] + +v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] + +v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_min_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] + +v_min_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] + +v_min_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] + +v_min_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] + +v_min_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] + +v_min_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] + +v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] + +v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_min_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] + +v_min_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] + +v_min_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] + +v_min_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] + +v_min_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] + +v_min_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] + +v_min_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] + +v_min_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_min_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] + +v_min_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] + +v_min_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] + +v_min_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] + +v_min_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] + +v_min_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] + +v_min_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] + +v_min_u32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] + +v_mul_dx9_zero_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] + +v_mul_dx9_zero_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] + +v_mul_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] + +v_mul_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] + +v_mul_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_mul_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] + +v_mul_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] + +v_mul_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] + +v_mul_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] + +v_mul_hi_i32_i24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] + +v_mul_hi_i32_i24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] + +v_mul_hi_u32_u24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] + +v_mul_hi_u32_u24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] + +v_mul_i32_i24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] + +v_mul_i32_i24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] + +v_mul_i32_i24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_i32_i24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_i32_i24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_i32_i24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_i32_i24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_i32_i24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_i32_i24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_i32_i24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] + +v_mul_legacy_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] + +v_mul_legacy_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] + +v_mul_legacy_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_legacy_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_legacy_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_legacy_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_legacy_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_legacy_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_legacy_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_legacy_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_legacy_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] + +v_mul_u32_u24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] + +v_mul_u32_u24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] + +v_mul_u32_u24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_u32_u24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_u32_u24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_u32_u24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_u32_u24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_u32_u24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_u32_u24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_u32_u24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_or_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] + +v_or_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] + +v_or_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] + +v_or_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] + +v_or_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] + +v_or_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_or_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] + +v_or_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] + +v_or_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] + +v_or_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] + +v_or_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] + +v_or_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] + +v_or_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] + +v_or_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] + +v_or_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x21,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x21,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x21,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x21,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] + +v_sub_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] + +v_sub_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] + +v_sub_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] + +v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] + +v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_sub_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] + +v_sub_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] + +v_sub_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] + +v_sub_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] + +v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] + +v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] + +v_sub_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] + +v_sub_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] + +v_sub_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x22,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x22,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x22,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x22,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_subrev_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] + +v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] + +v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_subrev_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] + +v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] + +v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] + +v_subrev_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] + +v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xnor_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] + +v_xnor_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] + +v_xnor_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] + +v_xnor_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] + +v_xnor_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] + +v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_xnor_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] + +v_xnor_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] + +v_xnor_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] + +v_xnor_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] + +v_xnor_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] + +v_xnor_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] + +v_xnor_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] + +v_xnor_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] + +v_xnor_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xor_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] + +v_xor_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] + +v_xor_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] + +v_xor_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] + +v_xor_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] + +v_xor_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_xor_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] + +v_xor_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] + +v_xor_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] + +v_xor_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] + +v_xor_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] + +v_xor_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] + +v_xor_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] + +v_xor_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] + +v_xor_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 956bb2d332669..97b7cc254fc81 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -46,61 +46,6 @@ # GFX11: v_add3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# W32: v_add_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_add_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_add_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01 - -# W32: v_add_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01 - -# GFX11: v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - # W32: v_add_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] # W64: v_add_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00 @@ -160,96 +105,6 @@ # GFX11: v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_add_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_add_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00 @@ -466,51 +321,6 @@ # GFX11: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_add_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00 @@ -646,51 +456,6 @@ # GFX11: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_and_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_and_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_and_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_and_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_and_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00 @@ -781,51 +546,6 @@ # GFX11: v_ashrrev_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_ashrrev_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ashrrev_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_ashrrev_i64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00 @@ -1390,61 +1110,6 @@ # GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# W32: v_cndmask_b32_e64 v5, v1, 0xaf123456, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_cndmask_b32_e64 v5, v255, src_scc, s6 ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, vcc_lo, v2, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_cndmask_b32_e64 v5, vcc_hi, v255, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, m0, 0.5, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, exec_hi, -1, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, null, exec_hi, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, -1, m0, s104 ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_cndmask_b32_e64 v5, -1, m0, s[104:105] ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -# W64: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41 - -# W32: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -# W64: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21 - -# GFX11: v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] -0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf - # GFX11: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00 @@ -2957,51 +2622,6 @@ # GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] 0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf - # GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 @@ -4181,141 +3801,6 @@ # GFX11: v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] 0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_fmac_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_fract_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00 @@ -4694,51 +4179,6 @@ # GFX11: v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] 0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf -# GFX11: v_ldexp_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_ldexp_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] -0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08 - -# GFX11: v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] -0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10 - -# GFX11: v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00 - # GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 @@ -5099,51 +4539,6 @@ # GFX11: v_lshlrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_lshlrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshlrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_lshlrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00 @@ -5216,51 +4611,6 @@ # GFX11: v_lshrrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_lshrrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshrrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_lshrrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00 @@ -5946,96 +5296,6 @@ # GFX11: v_max3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_max_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 @@ -6117,51 +5377,6 @@ # GFX11: v_max_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_max_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 @@ -6207,53 +5422,8 @@ # GFX11: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_max_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01 @@ -7062,96 +6232,6 @@ # GFX11: v_min3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_min_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 @@ -7233,51 +6313,6 @@ # GFX11: v_min_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_min_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 @@ -7323,51 +6358,6 @@ # GFX11: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_min_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00 @@ -7791,141 +6781,6 @@ # GFX11: v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_mul_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 @@ -8007,51 +6862,6 @@ # GFX11: v_mul_hi_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mul_hi_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 @@ -8097,96 +6907,6 @@ # GFX11: v_mul_hi_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 @@ -8277,51 +6997,6 @@ # GFX11: v_mul_lo_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mullit_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00 @@ -8550,51 +7225,6 @@ # GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_or_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_or_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_or_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_or_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_or_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 @@ -9696,61 +8326,6 @@ # GFX11: v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] 0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf -# W32: v_sub_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_sub_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_sub_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_sub_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01 - -# W32: v_sub_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01 - -# GFX11: v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - # W32: v_sub_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] # W64: v_sub_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00 @@ -9810,96 +8385,6 @@ # GFX11: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_sub_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_sub_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 @@ -10035,106 +8520,6 @@ # GFX11: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_sub_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# W32: v_subrev_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_subrev_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_subrev_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01 - -# W32: v_subrev_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01 - -# GFX11: v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - # W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] # W64: v_subrev_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00 @@ -10194,141 +8579,6 @@ # GFX11: v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_subrev_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00 @@ -10584,51 +8834,6 @@ # GFX11: v_xad_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_xnor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_xnor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_xnor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_xnor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_xor3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00 @@ -10718,48 +8923,3 @@ # GFX11: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_xor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_xor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_xor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_xor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 2f9f85f130236..65631e3b90929 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -43,61 +43,6 @@ # GFX11: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - # W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -153,90 +98,6 @@ # GFX11: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -321,48 +182,6 @@ # GFX11: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -489,48 +308,6 @@ # GFX11: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -615,48 +392,6 @@ # GFX11: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -1138,67 +873,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30] -0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30 - -# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30] -0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30 - # GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff @@ -2333,48 +2007,6 @@ # GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - # GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -3173,48 +2805,6 @@ # GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 - # GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -3509,48 +3099,6 @@ # GFX11: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -3593,48 +3141,6 @@ # GFX11: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -3845,90 +3351,6 @@ # GFX11: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -3971,48 +3393,6 @@ # GFX11: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -4055,48 +3435,6 @@ # GFX11: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -4601,90 +3939,6 @@ # GFX11: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -4727,48 +3981,6 @@ # GFX11: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -4811,50 +4023,8 @@ # GFX11: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -5273,258 +4443,6 @@ # GFX11: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -5567,48 +4485,6 @@ # GFX11: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -5819,48 +4695,6 @@ # GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -6575,61 +5409,6 @@ # GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -6685,90 +5464,6 @@ # GFX11: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -6811,103 +5506,6 @@ # GFX11: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - # W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -6963,132 +5561,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff @@ -7215,48 +5687,6 @@ # GFX11: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -7341,48 +5771,6 @@ # GFX11: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -7635,90 +6023,6 @@ # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt new file mode 100644 index 0000000000000..69a7122e43831 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt @@ -0,0 +1,1698 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30] +0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30 + +# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30] +0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 525940b74f73e..48eccc0b1fcc0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -37,25 +37,6 @@ # GFX11: v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x55,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -75,30 +56,6 @@ # GFX11: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x00,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -141,12 +98,6 @@ # GFX11: v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x26,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -219,12 +170,6 @@ # GFX11: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -267,12 +212,6 @@ # GFX11: v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x3a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -460,31 +399,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] 0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - -# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] -0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00 - -# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] -0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00 - # GFX11: v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 @@ -851,18 +765,6 @@ # GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x24,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 - # GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -1103,18 +1005,6 @@ # GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -1265,24 +1155,12 @@ # GFX11: v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x38,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x39,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1463,54 +1341,18 @@ # GFX11: v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x1e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1883,54 +1725,18 @@ # GFX11: v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x1b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0c,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2141,72 +1947,12 @@ # GFX11: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x80,0x39,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x05,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2297,12 +2043,6 @@ # GFX11: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2621,25 +2361,6 @@ # GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -2659,61 +2380,12 @@ # GFX11: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x01,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x25,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -2733,36 +2405,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 @@ -2823,12 +2465,6 @@ # GFX11: v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x45,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2871,12 +2507,6 @@ # GFX11: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -3015,30 +2645,6 @@ # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt new file mode 100644 index 0000000000000..f0165884ba71f --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt @@ -0,0 +1,396 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00 + +# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt new file mode 100644 index 0000000000000..ae577541bb205 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt @@ -0,0 +1,1842 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_add_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_add_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_add_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_add_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_add_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_add_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_add_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_and_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_and_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_and_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_and_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_and_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ashrrev_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ashrrev_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_cndmask_b32_e64 v5, v1, 0xaf123456, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_cndmask_b32_e64 v5, v255, src_scc, s6 ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, vcc_lo, v2, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_cndmask_b32_e64 v5, vcc_hi, v255, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, m0, 0.5, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, exec_hi, -1, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, null, exec_hi, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, -1, m0, s104 ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_cndmask_b32_e64 v5, -1, m0, s[104:105] ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +# W64: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41 + +# W32: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +# W64: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21 + +# GFX11: v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] +0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_ldexp_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_ldexp_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] +0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08 + +# GFX11: v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] +0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10 + +# GFX11: v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshlrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshrrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshrrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_max_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_max_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_min_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_min_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_min_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_or_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_or_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_or_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_or_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_or_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_sub_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_sub_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_sub_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_sub_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_subrev_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_subrev_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_subrev_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_subrev_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xnor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_xnor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_xnor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xnor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_xor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_xor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_xor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf From 093200fd00ff5d3a615410ce66d666467629a10b Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 1 Nov 2022 16:32:55 +0100 Subject: [PATCH 013/516] [AMDGPU][NFC] Split MC tests into promoted from VOP1 to VOP3 and only VOP3 Differential Revision: https://reviews.llvm.org/D136149 --- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 9241 +++++------------ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 3082 +----- .../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 2815 +++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 717 -- .../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 718 ++ .../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 3508 +++++++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 8602 +++++---------- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 3116 +----- .../gfx11_dasm_vop3_dpp16_from_vop1.txt | 2605 +++++ .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 600 -- .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 601 ++ .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 3283 ++++++ 12 files changed, 19447 insertions(+), 19441 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index d7908d75ecf91..693e12fd01b6d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -856,267 +856,6 @@ v_bfm_b32 v5, src_scc, vcc_lo v_bfm_b32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_bfrev_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] - -v_bfrev_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] - -v_bfrev_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] - -v_bfrev_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ceil_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] - -v_ceil_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] - -v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_ceil_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] - -v_ceil_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] - -v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ceil_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] - -v_ceil_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] - -v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_cls_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] - -v_cls_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] - -v_cls_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] - -v_cls_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] - -v_cls_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] - -v_cls_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] - -v_cls_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] - -v_cls_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] - -v_cls_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] - -v_cls_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] - -v_cls_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] - -v_cls_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] - -v_cls_i32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] - -v_cls_i32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] - -v_cls_i32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_clz_i32_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] - -v_clz_i32_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] - -v_clz_i32_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - v_cndmask_b16 v5, v1, src_scc, s3 // W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1216,141 +955,6 @@ v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null // GFX11: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cos_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] - -v_cos_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] - -v_cos_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] - -v_cos_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] - -v_cos_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] - -v_cos_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] - -v_cos_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] - -v_cos_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] - -v_cos_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] - -v_cos_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] - -v_cos_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] - -v_cos_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] - -v_cos_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] - -v_cos_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] - -v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_cos_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] - -v_cos_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] - -v_cos_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] - -v_cos_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] - -v_cos_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] - -v_cos_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] - -v_cos_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] - -v_cos_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] - -v_cos_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] - -v_cos_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] - -v_cos_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] - -v_cos_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] - -v_cos_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] - -v_cos_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] - -v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ctz_i32_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] - -v_ctz_i32_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] - -v_ctz_i32_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - v_cubeid_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] @@ -1531,6983 +1135,4259 @@ v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_f16_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f16_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f16_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_i16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] -v_cvt_f16_i16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 -// GFX11: encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_u16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_u16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_u16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_u16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_u16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_u16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f16_u16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_u16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_u16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_u16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_u16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_u16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_u16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f16_u16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 -// GFX11: encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_f64_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f32_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_i32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_i32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] -v_cvt_f32_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_f32_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f32_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f32_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f32_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f32_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f32_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_f32_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_f32_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f32_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f32_u32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_f32_u32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_f32_ubyte0_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_f32_ubyte0_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_f32_ubyte0_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte0_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte0_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte0_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte1_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte1_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte1_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte1_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte1_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte2_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte2_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte2_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte2_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte2_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte3_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte3_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte3_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte3_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte3_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f64_f32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f64_f32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f64_f32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f64_f32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] +v_div_fixup_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f64_f32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f64_f32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f64_f32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f64_f32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f64_f32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_f64_f32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_f64_f32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f64_f32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_f64_i32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -v_cvt_f64_i32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -v_cvt_f64_i32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cvt_f64_i32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] +v_div_fixup_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f64_i32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f64_i32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f64_i32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f64_i32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f64_i32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_f64_i32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_f64_i32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f64_i32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_f64_u32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] -v_cvt_f64_u32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] -v_cvt_f64_u32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] -v_cvt_f64_u32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] -v_cvt_f64_u32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] -v_cvt_f64_u32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] -v_cvt_f64_u32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] -v_cvt_f64_u32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] -v_cvt_f64_u32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] -v_cvt_f64_u32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] -v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] -v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -v_cvt_floor_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] -v_cvt_floor_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] -v_cvt_floor_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] -v_cvt_floor_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] -v_cvt_floor_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] -v_cvt_floor_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] -v_cvt_floor_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] -v_cvt_floor_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] -v_cvt_floor_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] -v_cvt_floor_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] -v_cvt_floor_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] -v_cvt_floor_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] -v_cvt_floor_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] -v_cvt_floor_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] -v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] -v_cvt_flr_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] -v_cvt_flr_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] -v_cvt_flr_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_cvt_flr_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] -v_cvt_flr_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] +v_div_fmas_f32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] -v_cvt_flr_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] -v_cvt_flr_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] -v_cvt_flr_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_cvt_flr_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] -v_cvt_flr_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] -v_cvt_flr_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] -v_cvt_flr_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] -v_cvt_flr_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] -v_cvt_flr_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] -v_cvt_flr_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] -v_cvt_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] -v_cvt_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] -v_cvt_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] -v_cvt_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp -// GFX11: encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp -// GFX11: encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_f64_e64 v255, 0xaf123456 clamp -// GFX11: encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_i32_i16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_nearest_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_nearest_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -v_cvt_nearest_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -v_cvt_nearest_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -v_cvt_nearest_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_cvt_nearest_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_cvt_nearest_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_nearest_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -v_cvt_nearest_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -v_cvt_nearest_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -v_cvt_nearest_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -v_cvt_nearest_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -v_cvt_nearest_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_cvt_nearest_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_norm_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_norm_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_norm_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_norm_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_norm_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_norm_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_norm_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_norm_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_norm_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_norm_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_cvt_norm_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_norm_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -v_cvt_norm_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] -v_cvt_norm_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_norm_u16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_norm_u16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_norm_u16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_norm_u16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_norm_u16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_norm_u16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_norm_u16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_norm_u16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_norm_u16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_norm_u16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cvt_norm_u16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_norm_u16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -v_cvt_norm_u16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -v_cvt_norm_u16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +v_fma_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_off_f32_i4_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] +v_fma_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_off_f32_i4_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] +v_fma_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_off_f32_i4_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] +v_fma_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_off_f32_i4_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_off_f32_i4_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_off_f32_i4_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_off_f32_i4_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] +v_fma_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_off_f32_i4_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_off_f32_i4_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_off_f32_i4_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_cvt_off_f32_i4_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_off_f32_i4_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -v_cvt_off_f32_i4_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 -// GFX11: encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] +v_fma_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_pk_i16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] +v_fma_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_pk_i16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] +v_fma_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_pk_i16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] +v_fma_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_pk_i16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_pk_i16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] +v_fma_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_pk_i16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_pk_i16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_pk_i16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_pk_i16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] -v_cvt_pk_i16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] -v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] -v_cvt_pk_i16_i32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] -v_cvt_pk_i16_i32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] -v_cvt_pk_i16_i32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] -v_cvt_pk_i16_i32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] -v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] -v_cvt_pk_i16_i32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] -v_cvt_pk_i16_i32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_i32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] -v_cvt_pk_i16_i32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] -v_cvt_pk_i16_i32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -v_cvt_pk_i16_i32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] +v_fma_legacy_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_pk_i16_i32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] +v_fma_legacy_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_pk_i16_i32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] +v_fma_legacy_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_fma_legacy_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] +v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_pk_norm_i16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] +v_fma_legacy_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] +v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] +v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] +v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] +v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] +v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -v_cvt_pk_norm_i16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] +v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] +v_ldexp_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] +v_ldexp_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] -v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] +v_ldexp_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] -v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_ldexp_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_ldexp_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_ldexp_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_u16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] +v_ldexp_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_pk_norm_u16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] +v_ldexp_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] +v_ldexp_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_ldexp_f32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] +v_ldexp_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] +v_ldexp_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] +v_ldexp_f32 v5, 0.5, m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] -v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] +v_ldexp_f32 v5, src_scc, vcc_lo mul:4 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] -v_cvt_pk_norm_u16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] +v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX11: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_u16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] +v_ldexp_f64 v[5:6], v[1:2], v2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] +v_ldexp_f64 v[5:6], v[1:2], v255 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] -v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] +v_ldexp_f64 v[5:6], v[1:2], s2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] -v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_ldexp_f64 v[5:6], v[1:2], s105 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] -v_cvt_pk_u16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] +v_ldexp_f64 v[5:6], v[254:255], ttmp15 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] -v_cvt_pk_u16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] +v_ldexp_f64 v[5:6], s[2:3], vcc_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] -v_cvt_pk_u16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] +v_ldexp_f64 v[5:6], s[104:105], vcc_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] -v_cvt_pk_u16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] +v_ldexp_f64 v[5:6], vcc, m0 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] -v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] +v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] -v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_ldexp_f64 v[5:6], exec, exec_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] -v_cvt_pk_u16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] +v_ldexp_f64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] -v_cvt_pk_u16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] +v_ldexp_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] -v_cvt_pk_u16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] +v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] -v_cvt_pk_u16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] +v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX11: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] -v_cvt_pk_u16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] +v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] -v_cvt_pk_u16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] +v_lerp_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_pk_u16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] +v_lerp_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] +v_lerp_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +v_lerp_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_pk_u16_u32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] +v_lerp_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_pk_u16_u32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] +v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_pk_u16_u32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] +v_lerp_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_pk_u16_u32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] +v_lerp_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] +v_lerp_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_lerp_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_pk_u16_u32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] +v_lerp_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_pk_u16_u32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] +v_lerp_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_pk_u16_u32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] +v_lerp_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_pk_u16_u32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] +v_lerp_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_pk_u16_u32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] +v_lerp_u8 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_cvt_pk_u16_u32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] +v_lshl_add_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_pk_u16_u32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] +v_lshl_add_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_pk_u16_u32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] +v_lshl_add_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_lshl_add_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_pk_u8_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] +v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_pk_u8_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] +v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_pk_u8_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] +v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_pk_u8_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] +v_lshl_add_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] +v_lshl_add_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_lshl_add_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] +v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_pk_u8_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] +v_lshl_add_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] +v_lshl_add_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] +v_lshl_add_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] +v_lshl_or_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] +v_lshl_or_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] +v_lshl_or_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null -// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] +v_lshl_or_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_pknorm_i16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_pknorm_i16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_i16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] +v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_pknorm_i16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] +v_lshl_or_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_pknorm_i16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] +v_lshl_or_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_pknorm_i16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_lshl_or_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_pknorm_i16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] +v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_i16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] +v_lshl_or_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_pknorm_i16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] +v_lshl_or_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_pknorm_i16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] +v_lshl_or_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_pknorm_i16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] +v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_i16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] +v_lshlrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pknorm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] +v_lshlrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] -v_cvt_pknorm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] +v_lshlrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] -v_cvt_pknorm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_lshlrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] +v_lshlrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] +v_lshlrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] +v_lshlrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_pknorm_i16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] +v_lshlrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] +v_lshlrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_lshlrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] +v_lshlrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] +v_lshlrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] +v_lshlrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] +v_lshlrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] +v_lshlrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] +v_lshlrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] +v_lshlrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] -v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] +v_lshlrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] -v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +v_lshlrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] -v_cvt_pknorm_u16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_lshlrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_lshlrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] -v_cvt_pknorm_u16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] +v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_u16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] +v_lshlrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] -v_cvt_pknorm_u16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] +v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_u16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_lshrrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pknorm_u16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] +v_lshrrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] -v_cvt_pknorm_u16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] +v_lshrrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] +v_lshrrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] +v_lshrrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] +v_lshrrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] +v_lshrrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_pknorm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] +v_lshrrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_cvt_pknorm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] +v_lshrrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_pknorm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_lshrrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] +v_lshrrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] +v_lshrrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] +v_lshrrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] +v_lshrrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] +v_lshrrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_lshrrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] +v_lshrrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] +v_lshrrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] -v_cvt_pknorm_u16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] +v_lshrrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] -v_cvt_pknorm_u16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] +v_lshrrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] -v_cvt_pknorm_u16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] +v_lshrrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] -v_cvt_pknorm_u16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] +v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_u16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] +v_lshrrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] -v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] +v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +v_mad_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_rpi_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] +v_mad_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_rpi_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] +v_mad_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_rpi_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] +v_mad_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_rpi_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] +v_mad_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_rpi_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] +v_mad_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_rpi_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] +v_mad_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_rpi_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] +v_mad_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_cvt_rpi_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] +v_mad_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_rpi_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] +v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_rpi_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] +v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_cvt_rpi_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] +v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_rpi_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] +v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_cvt_rpi_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] +v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_rpi_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] +v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_cvt_rpi_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_mad_i32_i16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] -v_cvt_u16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] +v_mad_i32_i16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] -v_cvt_u16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] +v_mad_i32_i16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] -v_cvt_u16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] +v_mad_i32_i16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] -v_cvt_u16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] +v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] -v_cvt_u16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] +v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_cvt_u16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] +v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_u16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] +v_mad_i32_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_cvt_u16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] +v_mad_i32_i16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] -v_cvt_u16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] +v_mad_i32_i16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] -v_cvt_u16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] +v_mad_i32_i16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] -v_cvt_u16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] +v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_u16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] +v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xfa,0x04,0x03,0x00,0x38,0x00,0x00] -v_cvt_u16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] +v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] -v_cvt_u16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] +v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] -v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp -// GFX11: encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +v_mad_i32_i24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_u32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] +v_mad_i32_i24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_u32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] +v_mad_i32_i24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_u32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] +v_mad_i32_i24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_u32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] +v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_u32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] +v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_u32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] +v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_u32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] +v_mad_i32_i24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_u32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] +v_mad_i32_i24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_u32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] +v_mad_i32_i24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_u32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] +v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_u32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] +v_mad_i32_i24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_u32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] +v_mad_i32_i24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_u32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] +v_mad_i32_i24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_u32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] +v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp -// GFX11: encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_mad_i64_i32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] +v_mad_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] +v_mad_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] +v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_f64_e64 v255, 0xaf123456 clamp -// GFX11: encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] +v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_cvt_u32_u16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] +v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -v_cvt_u32_u16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] +v_mad_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_u32_u16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] +v_mad_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_u32_u16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] +v_mad_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_u32_u16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] +v_mad_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_u32_u16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] +v_mad_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_u32_u16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] +v_mad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_u32_u16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +v_mad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_u32_u16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] +v_mad_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_cvt_u32_u16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_mad_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] -v_div_fixup_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] -v_div_fixup_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] -v_div_fixup_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] -v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +v_mad_u32_u16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] -v_div_fixup_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_u32_u16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] -v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +v_mad_u32_u16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] -v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +v_mad_u32_u16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] -v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] -v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] -v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +v_mad_u32_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_mad_u32_u16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] -v_div_fixup_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] +v_mad_u32_u16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] -v_div_fixup_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] +v_mad_u32_u16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] -v_div_fixup_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] +v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_div_fixup_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xfa,0x04,0x03,0x00,0x38,0x00,0x00] -v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] - -v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] - -v_div_fixup_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] - -v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] - -v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] - -v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] - -v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] - -v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] - -v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| -// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] - -v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] - -v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null -// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| -// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] - -v_div_fixup_f64 v[5:6], null, 0.5, vcc -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] - -v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 -// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] - -v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 -// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] - -v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] - -v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo -// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] - -v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 -// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] - -v_div_fmas_f32 v5, -|m0|, -|v255|, v3 -// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] - -v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| -// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] - -v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| -// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] - -v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| -// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] - -v_div_fmas_f32 v5, -1, -|m0|, -|m0| -// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] - -v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 -// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] - -v_div_fmas_f32 v5, vcc_lo, v2, v3 -// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] - -v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi -// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] - -v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 -// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] - -v_div_fmas_f32 v5, m0, 0.5, v255 -// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] - -v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| -// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] - -v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| -// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] - -v_div_fmas_f32 v5, null, m0, -|m0| -// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] - -v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| -// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] - -v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 -// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] - -v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fmas_f32 v5, v255, src_scc, src_scc -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] - -v_div_fmas_f32 v5, s105, s105, s105 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] - -v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] - -v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 -// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] - -v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] - -v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] -// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] - -v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| -// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] - -v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] - -v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null -// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] - -v_div_fmas_f64 v[5:6], null, 0.5, -src_scc -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] - -v_div_fmas_f64 v[5:6], -1, -exec, |exec| -// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] - -v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 -// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] - -v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 -// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] - -v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] - -v_div_scale_f32 v5, vcc_lo, v1, v2, s3 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, v255, s2, s105 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 -// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, v1, v2, s3 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, v255, s2, s105 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, s1, v255, exec_hi -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, s105, s105, exec_lo -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, m0, 0.5, m0 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 -// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 -// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 -// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_dot2_bf16_bf16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] - -v_dot2_bf16_bf16 v5, v255, v255, s105 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] - -v_dot2_bf16_bf16 v5, s1, s2, v3 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] - -v_dot2_bf16_bf16 v5, s105, s105, m0 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] - -v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] - -v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] - -v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] - -v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo -// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] - -v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| -// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] - -v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| -// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] - -v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| -// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] - -v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] - -v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] - -v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] - -v_dot2_f16_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] - -v_dot2_f16_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] - -v_dot2_f16_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] - -v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] - -v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] - -v_dot2_f16_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] - -v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] - -v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] - -v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] - -v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_exp_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] - -v_exp_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] - -v_exp_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] - -v_exp_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] - -v_exp_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] - -v_exp_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] - -v_exp_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] - -v_exp_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] - -v_exp_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] - -v_exp_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] - -v_exp_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] - -v_exp_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] - -v_exp_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] - -v_exp_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] - -v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_exp_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] - -v_exp_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] - -v_exp_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] - -v_exp_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] - -v_exp_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] - -v_exp_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] - -v_exp_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] - -v_exp_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] - -v_exp_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] - -v_exp_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] - -v_exp_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] - -v_exp_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] - -v_exp_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] - -v_exp_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] - -v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ffbh_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] - -v_ffbh_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] - -v_ffbh_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbh_i32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ffbh_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] - -v_ffbh_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] - -v_ffbh_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbh_u32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ffbl_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] - -v_ffbl_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] - -v_ffbl_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbl_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_floor_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] - -v_floor_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] - -v_floor_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] - -v_floor_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] - -v_floor_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] - -v_floor_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] - -v_floor_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] - -v_floor_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] - -v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_floor_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] - -v_floor_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] - -v_floor_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] - -v_floor_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] - -v_floor_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] - -v_floor_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] - -v_floor_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] - -v_floor_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] - -v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_floor_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] - -v_floor_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] - -v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_dx9_zero_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_dx9_zero_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_dx9_zero_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_dx9_zero_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fma_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] - -v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] - -v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_fma_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] - -v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] - -v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] - -v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| -// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] - -v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] - -v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null -// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| -// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] - -v_fma_f64 v[5:6], null, 0.5, vcc -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] - -v_fma_f64 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 -// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] - -v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 -// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] - -v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_legacy_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_legacy_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_legacy_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_legacy_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fract_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] - -v_fract_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] - -v_fract_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] - -v_fract_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] - -v_fract_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] - -v_fract_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] - -v_fract_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] - -v_fract_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] - -v_fract_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] - -v_fract_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] - -v_fract_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] - -v_fract_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] - -v_fract_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] - -v_fract_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] - -v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_fract_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] - -v_fract_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] - -v_fract_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] - -v_fract_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] - -v_fract_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] - -v_fract_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] - -v_fract_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] - -v_fract_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] - -v_fract_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] - -v_fract_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] - -v_fract_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] - -v_fract_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] - -v_fract_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] - -v_fract_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] - -v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_fract_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] - -v_fract_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] - -v_fract_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] - -v_fract_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] - -v_fract_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] - -v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_frexp_exp_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_frexp_exp_i32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] - -v_frexp_exp_i32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] - -v_frexp_exp_i32_f64_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_frexp_mant_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_mant_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] - -v_frexp_mant_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_mant_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] - -v_frexp_mant_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] - -v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_frexp_mant_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_mant_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] - -v_frexp_mant_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_mant_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] - -v_frexp_mant_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] - -v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_frexp_mant_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] - -v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] - -v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] - -v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_ldexp_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] - -v_ldexp_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] - -v_ldexp_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] - -v_ldexp_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] - -v_ldexp_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] - -v_ldexp_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_ldexp_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] - -v_ldexp_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] - -v_ldexp_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] - -v_ldexp_f32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] - -v_ldexp_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] - -v_ldexp_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] - -v_ldexp_f32 v5, 0.5, m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] - -v_ldexp_f32 v5, src_scc, vcc_lo mul:4 -// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] - -v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 -// GFX11: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ldexp_f64 v[5:6], v[1:2], v2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] - -v_ldexp_f64 v[5:6], v[1:2], v255 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] - -v_ldexp_f64 v[5:6], v[1:2], s2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] - -v_ldexp_f64 v[5:6], v[1:2], s105 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] - -v_ldexp_f64 v[5:6], v[254:255], ttmp15 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] - -v_ldexp_f64 v[5:6], s[2:3], vcc_hi -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] - -v_ldexp_f64 v[5:6], s[104:105], vcc_lo -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] - -v_ldexp_f64 v[5:6], vcc, m0 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] - -v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] - -v_ldexp_f64 v[5:6], exec, exec_lo -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] - -v_ldexp_f64 v[5:6], null, null -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] - -v_ldexp_f64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] - -v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] - -v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 -// GFX11: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] - -v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] - -v_lerp_u8 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] - -v_lerp_u8 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] - -v_lerp_u8 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] - -v_lerp_u8 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] - -v_lerp_u8 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] - -v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_lerp_u8 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] - -v_lerp_u8 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] - -v_lerp_u8 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] - -v_lerp_u8 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] - -v_lerp_u8 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_lerp_u8 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] - -v_lerp_u8 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] - -v_lerp_u8 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] - -v_lerp_u8 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_log_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] - -v_log_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] - -v_log_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] - -v_log_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] - -v_log_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] - -v_log_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] - -v_log_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] - -v_log_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] - -v_log_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] - -v_log_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] - -v_log_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] - -v_log_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] - -v_log_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] - -v_log_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] - -v_log_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_log_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] - -v_log_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] - -v_log_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] - -v_log_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] - -v_log_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] - -v_log_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] - -v_log_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] - -v_log_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] - -v_log_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] - -v_log_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] - -v_log_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] - -v_log_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] - -v_log_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] - -v_log_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] - -v_log_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_lshl_add_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] - -v_lshl_add_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] - -v_lshl_add_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] - -v_lshl_add_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] - -v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] - -v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] - -v_lshl_add_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] - -v_lshl_add_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] - -v_lshl_add_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] - -v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_lshl_add_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] - -v_lshl_add_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] - -v_lshl_add_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] - -v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_lshl_or_b32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] - -v_lshl_or_b32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] - -v_lshl_or_b32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] - -v_lshl_or_b32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] - -v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] - -v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] - -v_lshl_or_b32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] - -v_lshl_or_b32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] - -v_lshl_or_b32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] - -v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_lshl_or_b32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] - -v_lshl_or_b32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] - -v_lshl_or_b32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] - -v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_lshlrev_b16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] - -v_lshlrev_b16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] - -v_lshlrev_b16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] - -v_lshlrev_b16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] - -v_lshlrev_b16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] - -v_lshlrev_b16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_lshlrev_b16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] - -v_lshlrev_b16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] - -v_lshlrev_b16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] - -v_lshlrev_b16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] - -v_lshlrev_b16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] - -v_lshlrev_b16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] - -v_lshlrev_b16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] - -v_lshlrev_b16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] - -v_lshlrev_b16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_lshlrev_b64 v[5:6], v1, vcc -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] - -v_lshlrev_b64 v[5:6], v255, exec -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] - -v_lshlrev_b64 v[5:6], exec_lo, v[2:3] -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] - -v_lshlrev_b64 v[5:6], exec_hi, v[254:255] -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] - -v_lshlrev_b64 v[5:6], null, null -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] - -v_lshlrev_b64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] - -v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshlrev_b64 v[5:6], src_scc, src_scc -// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] - -v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 -// GFX11: encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshrrev_b16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] - -v_lshrrev_b16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] - -v_lshrrev_b16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] - -v_lshrrev_b16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] - -v_lshrrev_b16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] - -v_lshrrev_b16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_lshrrev_b16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] - -v_lshrrev_b16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] - -v_lshrrev_b16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] - -v_lshrrev_b16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] - -v_lshrrev_b16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] - -v_lshrrev_b16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] - -v_lshrrev_b16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] - -v_lshrrev_b16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] - -v_lshrrev_b16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_lshrrev_b64 v[5:6], v1, vcc -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] - -v_lshrrev_b64 v[5:6], v255, exec -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] - -v_lshrrev_b64 v[5:6], exec_lo, v[2:3] -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] - -v_lshrrev_b64 v[5:6], exec_hi, v[254:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] - -v_lshrrev_b64 v[5:6], null, null -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] - -v_lshrrev_b64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] - -v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshrrev_b64 v[5:6], src_scc, src_scc -// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] - -v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 -// GFX11: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mad_i16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] - -v_mad_i16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] - -v_mad_i16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] - -v_mad_i16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] - -v_mad_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] - -v_mad_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_mad_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_i16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_mad_i16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] - -v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] - -v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] - -v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x53,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] - -v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] - -v_mad_i32_i16 v5, v1, v2, v3 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] - -v_mad_i32_i16 v5, v255, v255, s3 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] - -v_mad_i32_i16 v5, s1, s2, v255 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] - -v_mad_i32_i16 v5, s105, s105, s105 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] - -v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] - -v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] - -v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_i32_i16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_mad_i32_i16 v5, exec_lo, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] - -v_mad_i32_i16 v5, exec_hi, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] - -v_mad_i32_i16 v5, null, exec_lo, null -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] - -v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xfa,0x04,0x03,0x00,0x38,0x00,0x00] - -v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] - -v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp -// GFX11: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] - -v_mad_i32_i24 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] - -v_mad_i32_i24 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] - -v_mad_i32_i24 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] - -v_mad_i32_i24 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] - -v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] - -v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_i32_i24 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] - -v_mad_i32_i24 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] - -v_mad_i32_i24 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] - -v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_mad_i32_i24 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] - -v_mad_i32_i24 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] - -v_mad_i32_i24 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] - -v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_mad_i64_i32 v[5:6], s6, s105, s105, s[6:7] -// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] -// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] -// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s6, exec_lo, -1, exec -// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s6, exec_hi, null, vcc -// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s105, null, exec_lo, null -// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 -// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 -// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc -// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null -// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 -// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 -// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc -// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp -// GFX11: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] - -v_mad_u16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] - -v_mad_u16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] - -v_mad_u16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] - -v_mad_u16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] - -v_mad_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] - -v_mad_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_mad_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_u16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_mad_u16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] - -v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] - -v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] - -v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x41,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] - -v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] - -v_mad_u32_u16 v5, v1, v2, v3 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] - -v_mad_u32_u16 v5, v255, v255, s3 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] - -v_mad_u32_u16 v5, s1, s2, v255 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] - -v_mad_u32_u16 v5, s105, s105, s105 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] - -v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] - -v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] - -v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_u32_u16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_mad_u32_u16 v5, exec_lo, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] - -v_mad_u32_u16 v5, exec_hi, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] - -v_mad_u32_u16 v5, null, exec_lo, null -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] - -v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xfa,0x04,0x03,0x00,0x38,0x00,0x00] - -v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] - -v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp -// GFX11: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] - -v_mad_u32_u24 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] - -v_mad_u32_u24 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] - -v_mad_u32_u24 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] - -v_mad_u32_u24 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] - -v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] - -v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] - -v_mad_u32_u24 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] - -v_mad_u32_u24 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] - -v_mad_u32_u24 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] - -v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_mad_u32_u24 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] - -v_mad_u32_u24 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] - -v_mad_u32_u24 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] - -v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_mad_u64_u32 v[5:6], s6, s105, s105, s[6:7] -// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] -// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] -// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s6, exec_lo, -1, exec -// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s6, exec_hi, null, vcc -// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s105, null, exec_lo, null -// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 -// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 -// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc -// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null -// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 -// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 -// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc -// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp -// GFX11: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] - -v_max3_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] - -v_max3_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] - -v_max3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] - -v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] - -v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_max3_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] - -v_max3_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] - -v_max3_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_max3_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] - -v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_max3_i16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_i16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_i16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_i16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_max3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] - -v_max3_i16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_max3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] - -v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] - -v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] - -v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] - -v_max3_i32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_i32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_i32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_i32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_i32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_i32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_max3_i32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] - -v_max3_i32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] - -v_max3_i32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_i32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] - -v_max3_i32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_max3_i32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] - -v_max3_i32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] - -v_max3_i32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] - -v_max3_i32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_max3_u16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_u16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_u16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_u16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_max3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] - -v_max3_u16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] - -v_max3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] - -v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] - -v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] - -v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] - -v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] - -v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] - -v_max3_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] - -v_max3_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] - -v_max3_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] - -v_max3_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] - -v_max3_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] - -v_max3_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_max3_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] - -v_max3_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] - -v_max3_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] - -v_max3_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] - -v_max3_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_max3_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] - -v_max3_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] - -v_max3_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] - -v_max3_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_max_f64 v[5:6], v[1:2], v[2:3] -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] - -v_max_f64 v[5:6], v[254:255], v[254:255] -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] - -v_max_f64 v[5:6], s[2:3], s[4:5] -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] - -v_max_f64 v[5:6], s[104:105], s[104:105] -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] - -v_max_f64 v[5:6], vcc, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] - -v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_f64 v[5:6], -|exec|, src_scc -// GFX11: encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] - -v_max_f64 v[5:6], null, 0.5 -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] - -v_max_f64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] - -v_max_f64 v[5:6], 0.5, null mul:2 -// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] - -v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 -// GFX11: encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] - -v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] - -v_max_i16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] - -v_max_i16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] - -v_max_i16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] - -v_max_i16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] - -v_max_i16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] - -v_max_i16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_max_i16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] - -v_max_i16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] - -v_max_i16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] - -v_max_i16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] - -v_max_i16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] - -v_max_i16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] - -v_max_i16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] - -v_max_i16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] - -v_max_i16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_max_u16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] - -v_max_u16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] - -v_max_u16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] - -v_max_u16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] - -v_max_u16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] - -v_max_u16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_max_u16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] - -v_max_u16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] - -v_max_u16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] - -v_max_u16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] - -v_max_u16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] - -v_max_u16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] - -v_max_u16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] - -v_max_u16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] - -v_max_u16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_maxmin_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] - -v_maxmin_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] - -v_maxmin_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] - -v_maxmin_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] - -v_maxmin_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] - -v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] - -v_maxmin_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] - -v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] - -v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] - -v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] - -v_maxmin_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] - -v_maxmin_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] - -v_maxmin_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] - -v_maxmin_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] - -v_maxmin_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] - -v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] - -v_maxmin_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] - -v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] - -v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] - -v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_maxmin_i32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] - -v_maxmin_i32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] - -v_maxmin_i32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] - -v_maxmin_i32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] - -v_maxmin_i32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] - -v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] - -v_maxmin_i32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] - -v_maxmin_i32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] - -v_maxmin_i32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] - -v_maxmin_i32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_maxmin_i32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] - -v_maxmin_i32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] -v_maxmin_i32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] -v_maxmin_i32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_mad_u32_u24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] -v_maxmin_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] +v_mad_u32_u24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] -v_maxmin_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] +v_mad_u32_u24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] -v_maxmin_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] +v_mad_u32_u24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] -v_maxmin_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] +v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] -v_maxmin_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] +v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] -v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] +v_mad_u32_u24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] -v_maxmin_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] +v_mad_u32_u24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] -v_maxmin_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] +v_mad_u32_u24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] -v_maxmin_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] +v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_maxmin_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_mad_u32_u24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] -v_maxmin_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] +v_mad_u32_u24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] -v_maxmin_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] +v_mad_u32_u24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] -v_maxmin_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] +v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_maxmin_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_mad_u64_u32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] +v_mad_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] +v_mad_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] +v_mad_u64_u32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] +v_mad_u64_u32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] +v_mad_u64_u32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_mad_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] +v_mad_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] +v_mad_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] +v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] +v_mad_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] +v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] +v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] +v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] +v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_lo_u32_b32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] +v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_lo_u32_b32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] +v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_mbcnt_lo_u32_b32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] +v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -v_mbcnt_lo_u32_b32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] +v_max3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] -v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] +v_max3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] -v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_max3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] -v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] +v_max3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] -v_mbcnt_lo_u32_b32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] +v_max3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] -v_mbcnt_lo_u32_b32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] +v_max3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mbcnt_lo_u32_b32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] +v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] -v_mbcnt_lo_u32_b32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] +v_max3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] -v_mbcnt_lo_u32_b32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] +v_max3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] -v_mbcnt_lo_u32_b32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] +v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] -v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] +v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] -v_med3_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] -v_med3_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] +v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] -v_med3_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] +v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_med3_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] -v_med3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] -v_med3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] -v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +v_max3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +v_max3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] -v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_max3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +v_max3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] -v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] -v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +v_max3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] -v_med3_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] +v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] -v_med3_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] +v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] -v_med3_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] +v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_med3_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] -v_med3_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] -v_med3_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_max3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] -v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] +v_max3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] +v_max3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] +v_max3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +v_max3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] +v_max3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] -v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] +v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] +v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_i16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4d,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_med3_i16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_i16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_i16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] -v_med3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] -v_med3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] -v_med3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_i16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +v_max3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +v_max3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] -v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +v_max3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_i32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] +v_max3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_i32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] +v_max3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_i32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] +v_max3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_med3_i32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] -v_med3_i32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] -v_med3_i32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_max3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] -v_med3_i32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_i32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] +v_max3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_i32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] +v_max3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_med3_i32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_i32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_max3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_med3_i32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] -v_med3_i32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] +v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_i32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_med3_i32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_u16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4e,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_med3_u16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_u16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_med3_u16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +v_max3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] -v_med3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +v_max3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] -v_med3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] -v_med3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +v_max3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] -v_med3_u16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +v_max3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] -v_med3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +v_max3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +v_max3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] -v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_max3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] -v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +v_max3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] -v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +v_max3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] -v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +v_max3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_max3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] -v_med3_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] +v_max3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] -v_med3_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] +v_max3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] -v_med3_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] +v_max3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_med3_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] +v_max_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] -v_med3_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] +v_max_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] -v_med3_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_max_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] -v_med3_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] +v_max_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] -v_med3_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] +v_max_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] -v_med3_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] +v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_med3_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] +v_max_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] -v_med3_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_max_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] -v_med3_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] +v_max_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] -v_med3_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] +v_max_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] -v_med3_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] +v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] -v_med3_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -v_min3_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +v_max_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] -v_min3_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] +v_max_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] -v_min3_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] +v_max_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] -v_min3_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +v_max_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] -v_min3_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +v_max_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] -v_min3_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_max_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +v_max_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] -v_min3_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +v_max_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_min3_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +v_max_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] -v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +v_max_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] -v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_max_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] -v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +v_max_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] -v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +v_max_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] +v_max_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] -v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_max_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min3_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] +v_max_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] -v_min3_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] +v_max_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] -v_min3_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] +v_max_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] -v_min3_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] +v_max_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] -v_min3_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] +v_max_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] -v_min3_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_max_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] +v_max_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] -v_min3_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] +v_max_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_min3_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] +v_max_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] -v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] +v_max_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] -v_min3_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +v_max_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] -v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] +v_max_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] -v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] +v_max_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] +v_max_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] -v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +v_maxmin_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] -v_min3_i16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +v_maxmin_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] -v_min3_i16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +v_maxmin_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] -v_min3_i16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +v_maxmin_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] -v_min3_i16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +v_maxmin_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_i16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] -v_min3_i16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +v_maxmin_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_i16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] -v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] -v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] -v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] -v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] -v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -v_min3_i32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] +v_maxmin_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] -v_min3_i32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] +v_maxmin_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] -v_min3_i32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] +v_maxmin_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] -v_min3_i32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] +v_maxmin_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] -v_min3_i32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] +v_maxmin_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_i32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_min3_i32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] +v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] -v_min3_i32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] +v_maxmin_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_i32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] +v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] -v_min3_i32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] +v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] -v_min3_i32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_min3_i32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] +v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] -v_min3_i32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] +v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] -v_min3_i32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] +v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] -v_min3_i32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_min3_u16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +v_maxmin_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] -v_min3_u16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +v_maxmin_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] -v_min3_u16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +v_maxmin_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] -v_min3_u16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +v_maxmin_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] -v_min3_u16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +v_maxmin_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_u16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_min3_u16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_u16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] +v_maxmin_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_u16 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +v_maxmin_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] -v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +v_maxmin_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +v_maxmin_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +v_maxmin_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] +v_maxmin_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +v_maxmin_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +v_maxmin_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min3_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] +v_maxmin_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] -v_min3_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] +v_maxmin_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] -v_min3_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] +v_maxmin_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] -v_min3_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] +v_maxmin_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] -v_min3_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] +v_maxmin_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] -v_min3_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_min3_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] +v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] -v_min3_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] +v_maxmin_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] -v_min3_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] +v_maxmin_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] -v_min3_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] +v_maxmin_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] -v_min3_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_maxmin_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_min3_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] +v_maxmin_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] -v_min3_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] +v_maxmin_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] -v_min3_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] +v_maxmin_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] -v_min3_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_maxmin_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min_f64 v[5:6], v[1:2], v[2:3] -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] +v_mbcnt_hi_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] -v_min_f64 v[5:6], v[254:255], v[254:255] -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] +v_mbcnt_hi_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] -v_min_f64 v[5:6], s[2:3], s[4:5] -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] +v_mbcnt_hi_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] -v_min_f64 v[5:6], s[104:105], s[104:105] -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] +v_mbcnt_hi_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] -v_min_f64 v[5:6], vcc, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] +v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] -v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_min_f64 v[5:6], -|exec|, src_scc -// GFX11: encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] +v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] -v_min_f64 v[5:6], null, 0.5 -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] +v_mbcnt_hi_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] -v_min_f64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] +v_mbcnt_hi_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] -v_min_f64 v[5:6], 0.5, null mul:2 -// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] +v_mbcnt_hi_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] -v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 -// GFX11: encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] +v_mbcnt_hi_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] -v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] +v_mbcnt_hi_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] -v_min_i16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +v_mbcnt_hi_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] -v_min_i16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] +v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] -v_min_i16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] +v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_min_i16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] -v_min_i16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] -v_min_i16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] -v_min_i16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] +v_mbcnt_lo_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] -v_min_i16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] -v_min_i16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] +v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_min_i16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] -v_min_i16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] -v_min_i16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] -v_min_i16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] -v_min_i16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] -v_min_i16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_mbcnt_lo_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] -v_min_u16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +v_mbcnt_lo_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] -v_min_u16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] +v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] -v_min_u16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] +v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_min_u16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] +v_med3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] -v_min_u16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] +v_med3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] -v_min_u16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_med3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] -v_min_u16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] +v_med3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] -v_min_u16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +v_med3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] -v_min_u16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] +v_med3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_min_u16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] +v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] -v_min_u16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] +v_med3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] -v_min_u16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] +v_med3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] -v_min_u16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] -v_min_u16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] +v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_min_u16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] -v_minmax_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] -v_minmax_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] -v_minmax_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] -v_minmax_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] -v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_med3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] -v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +v_med3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] -v_minmax_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] -v_minmax_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +v_med3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] -v_minmax_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_med3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] -v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +v_med3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] -v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] -v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +v_med3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] -v_minmax_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] +v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] -v_minmax_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] +v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] -v_minmax_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] +v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_minmax_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] -v_minmax_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] -v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_med3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] -v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] +v_med3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] -v_minmax_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] -v_minmax_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] +v_med3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] +v_med3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] -v_minmax_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +v_med3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] +v_med3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] -v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] +v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] -v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] +v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] -v_minmax_i32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] +v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x50,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_minmax_i32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] +v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] -v_minmax_i32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] +v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_minmax_i32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] -v_minmax_i32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] -v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_med3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] -v_minmax_i32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] -v_minmax_i32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] -v_minmax_i32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] +v_med3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_minmax_i32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] -v_minmax_i32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_med3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] -v_minmax_i32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] -v_minmax_i32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] -v_minmax_i32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_minmax_i32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_med3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] -v_minmax_u32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] +v_med3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] -v_minmax_u32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] +v_med3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] -v_minmax_u32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] +v_med3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_minmax_u32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] +v_med3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] -v_minmax_u32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] +v_med3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] -v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_med3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] -v_minmax_u32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] +v_med3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] -v_minmax_u32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] +v_med3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] -v_minmax_u32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] +v_med3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_minmax_u32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] +v_med3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] -v_minmax_u32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_med3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_minmax_u32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] +v_med3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] -v_minmax_u32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] +v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] -v_minmax_u32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] +v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_minmax_u32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] -v_mov_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] +v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x51,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_mov_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] +v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] -v_mov_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] +v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_mov_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] +v_med3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] -v_mov_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] +v_med3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] -v_mov_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] +v_med3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] -v_mov_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] +v_med3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] -v_mov_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] +v_med3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] -v_mov_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] +v_med3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_mov_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] +v_med3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] -v_mov_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] +v_med3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] -v_mov_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] +v_med3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] -v_mov_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] +v_med3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] -v_mov_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] +v_med3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_mov_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_med3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] -v_movreld_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] +v_med3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] -v_movreld_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] +v_med3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] -v_movreld_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] +v_med3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_movreld_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] +v_min3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] -v_movreld_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] +v_min3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] -v_movreld_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] +v_min3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] -v_movreld_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] +v_min3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] -v_movreld_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] +v_min3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] -v_movreld_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] +v_min3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_movreld_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] +v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] -v_movreld_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] +v_min3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] -v_movreld_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] +v_min3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] -v_movreld_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] +v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] -v_movreld_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] +v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_movreld_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] -v_movrels_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] +v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] -v_movrels_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] +v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] -v_movrelsd_2_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] +v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_movrelsd_2_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] +v_min3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] -v_movrelsd_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] +v_min3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] -v_movrelsd_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] +v_min3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] -v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] +v_min3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] -v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] +v_min3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] -v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] +v_min3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] +v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] -v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] +v_min3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] -v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] +v_min3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] -v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] +v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] -v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] +v_min3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] +v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] -v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] +v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] -v_mqsad_pk_u16_u8 v[5:6], null, null, vcc -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] +v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] -v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] +v_min3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] -v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 -// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] +v_min3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] -v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp -// GFX11: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] +v_min3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] -v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] +v_min3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] -v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] +v_min3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] -v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] +v_min3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] +v_min3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] -v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] +v_min3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] +v_min3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] -v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] +v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] -v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] +v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] +v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] -v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] +v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4a,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_mqsad_u32_u8 v[5:8], null, null, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] +v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] -v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] +v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] +v_min3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] -v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] -// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] +v_min3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] -v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp -// GFX11: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +v_min3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] -v_msad_u8 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] +v_min3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] -v_msad_u8 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] +v_min3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] -v_msad_u8 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] +v_min3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_msad_u8 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] +v_min3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] -v_msad_u8 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] +v_min3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] -v_msad_u8 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_min3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] -v_msad_u8 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] +v_min3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] -v_msad_u8 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] +v_min3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_msad_u8 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] +v_min3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] -v_msad_u8 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] +v_min3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] -v_msad_u8 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_min3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] -v_msad_u8 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] +v_min3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_msad_u8 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] +v_min3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] -v_msad_u8 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] +v_min3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] -v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_min3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] -v_mul_f64 v[5:6], v[1:2], v[2:3] -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] +v_min3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] -v_mul_f64 v[5:6], v[254:255], v[254:255] -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] +v_min3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] -v_mul_f64 v[5:6], s[2:3], s[4:5] -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] +v_min3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mul_f64 v[5:6], s[104:105], s[104:105] -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] +v_min3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] -v_mul_f64 v[5:6], vcc, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] +v_min3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00] -v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_min3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] -v_mul_f64 v[5:6], -|exec|, src_scc -// GFX11: encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] +v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] -v_mul_f64 v[5:6], null, 0.5 -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] +v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -v_mul_f64 v[5:6], -1, -1 -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] +v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] -v_mul_f64 v[5:6], 0.5, null mul:2 -// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] +v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4b,0xd6,0xff,0xfa,0xfc,0x03,0x00,0x38,0x00,0x00] -v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 -// GFX11: encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] +v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] -v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] +v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -v_mul_hi_i32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] +v_min3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] -v_mul_hi_i32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] +v_min3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] -v_mul_hi_i32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] +v_min3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] -v_mul_hi_i32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] +v_min3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] -v_mul_hi_i32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] +v_min3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] -v_mul_hi_i32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_min3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_mul_hi_i32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] +v_min3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] -v_mul_hi_i32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] +v_min3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] -v_mul_hi_i32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] +v_min3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] -v_mul_hi_i32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] +v_min3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] -v_mul_hi_i32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] +v_min3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_mul_hi_i32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] +v_min3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] -v_mul_hi_i32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] +v_min3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] -v_mul_hi_i32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] +v_min3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] -v_mul_hi_i32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_min3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_mul_hi_u32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] +v_min_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] -v_mul_hi_u32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] +v_min_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] -v_mul_hi_u32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] +v_min_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] -v_mul_hi_u32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] +v_min_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] -v_mul_hi_u32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] +v_min_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] -v_mul_hi_u32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_mul_hi_u32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] +v_min_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] -v_mul_hi_u32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] +v_min_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] -v_mul_hi_u32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] +v_min_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] -v_mul_hi_u32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] +v_min_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] -v_mul_hi_u32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] +v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] -v_mul_hi_u32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] +v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -v_mul_hi_u32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] +v_min_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] -v_mul_hi_u32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] +v_min_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] -v_mul_hi_u32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_min_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] -v_mul_lo_u16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +v_min_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] -v_mul_lo_u16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] +v_min_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] -v_mul_lo_u16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] +v_min_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] +v_min_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] -v_mul_lo_u16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] +v_min_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_mul_lo_u16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_min_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] -v_mul_lo_u16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] +v_min_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] -v_mul_lo_u16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +v_min_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] -v_mul_lo_u16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] +v_min_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] -v_mul_lo_u16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] +v_min_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_mul_lo_u16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] +v_min_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] -v_mul_lo_u16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] +v_min_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +v_min_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] -v_mul_lo_u16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] +v_min_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] -v_mul_lo_u16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_min_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] -v_mul_lo_u32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] +v_min_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] -v_mul_lo_u32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] +v_min_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] -v_mul_lo_u32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] +v_min_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] +v_min_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] -v_mul_lo_u32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] +v_min_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_mul_lo_u32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_min_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] -v_mul_lo_u32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] +v_min_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] -v_mul_lo_u32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] +v_min_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] -v_mul_lo_u32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] +v_min_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] -v_mul_lo_u32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] +v_min_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_mul_lo_u32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] +v_min_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] -v_mul_lo_u32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] +v_min_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] +v_minmax_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] -v_mul_lo_u32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] +v_minmax_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] -v_mul_lo_u32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_minmax_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] -v_mullit_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] +v_minmax_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] -v_mullit_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] +v_minmax_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] -v_mullit_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] +v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_mullit_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] +v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] -v_mullit_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] +v_minmax_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] -v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_minmax_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] -v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] +v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] -v_mullit_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] +v_minmax_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_mullit_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] +v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] -v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] +v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] -v_mullit_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] -v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] +v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] +v_minmax_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] -v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] +v_minmax_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] -v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +v_minmax_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] -v_nop_e64 -// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] +v_minmax_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] -v_not_b16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] +v_minmax_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] -v_not_b16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] +v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_not_b16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] +v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] -v_not_b16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] +v_minmax_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] -v_not_b16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] +v_minmax_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] -v_not_b16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] +v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] -v_not_b16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] +v_minmax_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_not_b16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] +v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] -v_not_b16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] +v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] -v_not_b16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] +v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] -v_not_b16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] +v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_not_b16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] +v_minmax_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] -v_not_b16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] +v_minmax_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] -v_not_b16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] +v_minmax_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] -v_not_b16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_minmax_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] -v_not_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] +v_minmax_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] -v_not_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] +v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_not_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] +v_minmax_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] -v_not_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] +v_minmax_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] -v_not_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] +v_minmax_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] -v_not_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] +v_minmax_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] -v_not_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] +v_minmax_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_not_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] +v_minmax_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] -v_not_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] +v_minmax_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] -v_not_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] +v_minmax_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] -v_not_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] +v_minmax_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_not_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] +v_minmax_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] -v_not_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] +v_minmax_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] -v_not_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] +v_minmax_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] -v_not_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_minmax_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] -v_or3_b32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] +v_minmax_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] -v_or3_b32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] +v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_or3_b32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] +v_minmax_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] -v_or3_b32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] +v_minmax_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] -v_or3_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] +v_minmax_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] -v_or3_b32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_minmax_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] -v_or3_b32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] +v_minmax_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_or3_b32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] +v_minmax_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] -v_or3_b32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] +v_minmax_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] -v_or3_b32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] +v_minmax_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] -v_or3_b32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_minmax_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_or3_b32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] -v_or3_b32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] -v_or3_b32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] -v_or3_b32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] -v_or_b16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] -v_or_b16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] -v_or_b16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] -v_or_b16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] -v_or_b16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] -v_or_b16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] -v_or_b16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +v_mqsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] -v_or_b16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_or_b16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] -v_or_b16 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] -v_or_b16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -v_or_b16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] -v_or_b16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] -v_or_b16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] -v_or_b16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] -v_pack_b32_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] +v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] -v_pack_b32_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] +v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] -v_pack_b32_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] +v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] -v_pack_b32_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] +v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] -v_pack_b32_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] +v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] -v_pack_b32_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] -v_pack_b32_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] +v_mqsad_u32_u8 v[5:8], null, null, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] -v_pack_b32_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] +v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] -v_pack_b32_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] +v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] -v_pack_b32_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] +v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] -v_pack_b32_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] +v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp +// GFX11: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -v_pack_b32_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] +v_msad_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] -v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] +v_msad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] -v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] +v_msad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] -v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_msad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] -v_perm_b32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] +v_msad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] -v_perm_b32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] +v_msad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_perm_b32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] +v_msad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] -v_perm_b32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] +v_msad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] -v_perm_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] +v_msad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] -v_perm_b32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_msad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] -v_perm_b32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] +v_msad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_perm_b32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] +v_msad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] -v_perm_b32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] +v_msad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] -v_perm_b32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] +v_msad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] -v_perm_b32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_perm_b32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] +v_mul_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] -v_perm_b32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] +v_mul_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] -v_perm_b32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] +v_mul_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] -v_perm_b32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_mul_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] -v_permlane16_b32 v5, v1, s2, s3 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] +v_mul_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] -v_permlane16_b32 v5, v1, s105, s105 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] +v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_permlane16_b32 v5, v1, ttmp15, ttmp15 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] +v_mul_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] -v_permlane16_b32 v5, v1, vcc_hi, exec_lo -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] +v_mul_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] -v_permlane16_b32 v5, v1, vcc_lo, m0 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] +v_mul_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] -v_permlane16_b32 v5, v1, m0, vcc_hi -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] +v_mul_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] -v_permlane16_b32 v5, v1, exec_hi, vcc_lo -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] +v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] -v_permlane16_b32 v5, v1, exec_lo, src_scc -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] +v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] -// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] +v_mul_hi_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] -v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] +v_mul_hi_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] -v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] -// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] +v_mul_hi_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] -v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] -// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] +v_mul_hi_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] -v_permlanex16_b32 v5, v1, s2, s3 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] +v_mul_hi_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] -v_permlanex16_b32 v5, v1, s105, s105 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] +v_mul_hi_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_permlanex16_b32 v5, v1, ttmp15, ttmp15 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] +v_mul_hi_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] -v_permlanex16_b32 v5, v1, vcc_hi, exec_lo -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] +v_mul_hi_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] -v_permlanex16_b32 v5, v1, vcc_lo, m0 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] +v_mul_hi_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] -v_permlanex16_b32 v5, v1, m0, vcc_hi -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] +v_mul_hi_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] -v_permlanex16_b32 v5, v1, exec_hi, vcc_lo -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] +v_mul_hi_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] -v_permlanex16_b32 v5, v1, exec_lo, src_scc -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] +v_mul_hi_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] -v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] -// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] +v_mul_hi_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] -v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] +v_mul_hi_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] -v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] -// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] +v_mul_hi_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] -// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] +v_mul_hi_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] -v_pipeflush_e64 -// GFX11: encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] +v_mul_hi_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] -v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] +v_mul_hi_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] +v_mul_hi_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] +v_mul_hi_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] +v_mul_hi_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] +v_mul_hi_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] -v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] +v_mul_hi_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] -v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] +v_mul_hi_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] -v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] +v_mul_hi_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] +v_mul_hi_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] +v_mul_hi_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], null, null, vcc -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] +v_mul_hi_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +v_mul_hi_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] -v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] +v_mul_hi_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] +v_mul_lo_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] -v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp -// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] +v_mul_lo_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] -v_rcp_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] +v_mul_lo_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] -v_rcp_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] +v_mul_lo_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] -v_rcp_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] +v_mul_lo_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_rcp_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] +v_mul_lo_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] -v_rcp_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] +v_mul_lo_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_rcp_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] +v_mul_lo_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] -v_rcp_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] +v_mul_lo_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] -v_rcp_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] +v_mul_lo_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] -v_rcp_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] +v_mul_lo_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] -v_rcp_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] +v_mul_lo_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_rcp_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] +v_mul_lo_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] -v_rcp_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] +v_mul_lo_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_rcp_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] +v_mul_lo_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] -v_rcp_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] +v_mul_lo_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] -v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_mul_lo_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] -v_rcp_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] +v_mul_lo_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] -v_rcp_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] +v_mul_lo_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] -v_rcp_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] +v_mul_lo_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_rcp_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] +v_mul_lo_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] -v_rcp_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] +v_mul_lo_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] -v_rcp_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] +v_mul_lo_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] -v_rcp_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] +v_mul_lo_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] -v_rcp_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] +v_mul_lo_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] -v_rcp_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] +v_mul_lo_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] -v_rcp_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] +v_mul_lo_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] -v_rcp_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] +v_mul_lo_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] -v_rcp_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] +v_mul_lo_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_rcp_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] +v_mullit_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] -v_rcp_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] +v_mullit_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] -v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_mullit_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] -v_rcp_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] +v_mullit_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] -v_rcp_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] +v_mullit_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] -v_rcp_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] +v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_rcp_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] +v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] -v_rcp_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] +v_mullit_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] -v_rcp_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] +v_mullit_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] -v_rcp_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] +v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] -v_rcp_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] +v_mullit_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_rcp_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] +v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] -v_rcp_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] +v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] -v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] -v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_rcp_iflag_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] +v_or3_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] -v_rcp_iflag_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] +v_or3_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] -v_rcp_iflag_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] +v_or3_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] -v_rcp_iflag_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] +v_or3_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] -v_rcp_iflag_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] +v_or3_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] -v_rcp_iflag_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] +v_or3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_rcp_iflag_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] +v_or3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] -v_rcp_iflag_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] +v_or3_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] -v_rcp_iflag_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] +v_or3_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] -v_rcp_iflag_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] +v_or3_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] -v_rcp_iflag_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] +v_or3_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_rcp_iflag_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] +v_or3_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] -v_rcp_iflag_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] +v_or3_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] -v_rcp_iflag_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] +v_or3_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] -v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_or3_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_readlane_b32 s5, v1, s2 -// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] +v_or_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] -v_readlane_b32 s5, v1, s105 -// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] +v_or_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] -v_readlane_b32 s105, v1, ttmp15 -// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] +v_or_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] -v_readlane_b32 vcc_lo, v1, vcc_hi -// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] +v_or_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] -v_readlane_b32 vcc_hi, v1, vcc_lo -// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] +v_or_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] -v_readlane_b32 ttmp15, v1, m0 -// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] +v_or_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_readlane_b32 null, v255, null -// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] +v_or_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] -v_rndne_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] +v_or_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] -v_rndne_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] +v_or_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] -v_rndne_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] +v_or_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] -v_rndne_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] +v_or_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] -v_rndne_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] +v_or_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] -v_rndne_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] +v_or_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_rndne_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] +v_or_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] -v_rndne_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] +v_or_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_rndne_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] +v_pack_b32_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] -v_rndne_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] +v_pack_b32_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] -v_rndne_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] +v_pack_b32_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] -v_rndne_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] +v_pack_b32_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] -v_rndne_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] +v_pack_b32_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] -v_rndne_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] +v_pack_b32_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_pack_b32_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] -v_rndne_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] +v_pack_b32_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] -v_rndne_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] +v_pack_b32_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] -v_rndne_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] +v_pack_b32_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] -v_rndne_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] +v_pack_b32_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] -v_rndne_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] +v_pack_b32_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] -v_rndne_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] +v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] -v_rndne_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] +v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] -v_rndne_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] +v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_rndne_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] +v_perm_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] -v_rndne_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] +v_perm_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] -v_rndne_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] +v_perm_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] -v_rndne_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] +v_perm_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] -v_rndne_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] +v_perm_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] -v_rndne_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] +v_perm_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_perm_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] -v_rndne_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] +v_perm_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] -v_rndne_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] +v_perm_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] -v_rndne_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] +v_perm_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] -v_rndne_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] +v_perm_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_rndne_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] -v_rndne_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] -v_rndne_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] -v_rndne_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_rndne_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] -v_rndne_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] +v_permlane16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] -v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] -v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] -v_rsq_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] -v_rsq_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] -v_rsq_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] -v_rsq_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] -v_rsq_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] -v_rsq_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] -v_rsq_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] -v_rsq_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] -v_rsq_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] -v_rsq_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] -v_rsq_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] -v_rsq_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] -v_rsq_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] -v_rsq_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] -v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] -v_rsq_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] -v_rsq_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] -v_rsq_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] -v_rsq_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] -v_rsq_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] -v_rsq_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] -v_rsq_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] -v_rsq_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] -v_rsq_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] -v_rsq_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] -v_rsq_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] -v_rsq_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] -v_rsq_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] -v_rsq_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] -v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] -v_rsq_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] -v_rsq_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] -v_rsq_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] -v_rsq_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] +v_readlane_b32 s5, v1, s2 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] -v_rsq_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] +v_readlane_b32 s5, v1, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] -v_rsq_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] +v_readlane_b32 s105, v1, ttmp15 +// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] -v_rsq_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] -v_rsq_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] -v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] +v_readlane_b32 ttmp15, v1, m0 +// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] -v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_readlane_b32 null, v255, null +// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] v_sad_hi_u8 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] @@ -8689,267 +5569,6 @@ v_sad_u8 v5, src_scc, vcc_lo, -1 v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp // GFX11: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_sat_pk_u8_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_sin_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] - -v_sin_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] - -v_sin_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] - -v_sin_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] - -v_sin_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] - -v_sin_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] - -v_sin_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] - -v_sin_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] - -v_sin_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] - -v_sin_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] - -v_sin_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] - -v_sin_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] - -v_sin_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] - -v_sin_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] - -v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_sin_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] - -v_sin_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] - -v_sin_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] - -v_sin_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] - -v_sin_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] - -v_sin_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] - -v_sin_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] - -v_sin_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] - -v_sin_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] - -v_sin_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] - -v_sin_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] - -v_sin_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] - -v_sin_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] - -v_sin_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] - -v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_sqrt_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] - -v_sqrt_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] - -v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_sqrt_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] - -v_sqrt_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] - -v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_sqrt_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] - -v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - v_sub_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -9360,132 +5979,6 @@ v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 // GFX11: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] -v_trunc_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] - -v_trunc_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] - -v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_trunc_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] - -v_trunc_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] - -v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_trunc_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] - -v_trunc_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] - -v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - v_writelane_b32 v5, s1, s2 // GFX11: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index fb3a7f4524059..dd8f465dc0a5c 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -740,216 +740,6 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cls_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1057,132 +847,6 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_m v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cos_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cos_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -1261,893 +925,95 @@ v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0 v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_flr_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -2590,530 +1456,68 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_exp_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_exp_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ffbh_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbh_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbh_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbh_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ffbh_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbh_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbh_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbh_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ffbl_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbl_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbl_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbl_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_floor_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_floor_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3199,258 +1603,6 @@ v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_fract_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_fract_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -3535,90 +1687,6 @@ v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_log_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_log_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5335,263 +3403,53 @@ v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] - -v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] - -v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_mov_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] -v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] -v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5719,90 +3577,6 @@ v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_m v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_not_b16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_not_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5971,300 +3745,6 @@ v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -6433,216 +3913,6 @@ v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sin_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sin_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -6983,90 +4253,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_ma v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s new file mode 100644 index 0000000000000..15c0cda5a4232 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -0,0 +1,2815 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cls_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cos_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cos_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_flr_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_exp_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_exp_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ffbh_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbh_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbh_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbh_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ffbh_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbh_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbh_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbh_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ffbl_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbl_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbl_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbl_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_floor_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_fract_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_fract_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_log_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_log_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_mov_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_not_b16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_not_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sin_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sin_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 6440cd4c9b844..5742817e63801 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -387,57 +387,6 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa2,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -477,39 +426,6 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] -v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xe1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -654,210 +570,6 @@ v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x8a,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0xd1,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0xd0,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x8b,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x85,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x86,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x91,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x92,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x93,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x94,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_flr_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x88,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xe3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xe4,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x8e,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1008,42 +720,6 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd2,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1080,81 +756,6 @@ v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd8,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbl_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1227,72 +828,6 @@ v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdf,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xda,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xbf,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xc0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1341,30 +876,6 @@ v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2637,51 +2148,6 @@ v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2763,24 +2229,6 @@ v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2874,90 +2322,6 @@ v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xaa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xab,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xde,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xae,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3102,63 +2466,6 @@ v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xe0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3264,30 +2571,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s new file mode 100644 index 0000000000000..f8478673e4fc2 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -0,0 +1,718 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa2,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xe1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x8a,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0xd1,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0xd0,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x8b,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x85,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x86,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x91,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x92,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x93,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x94,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x88,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xe3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xe4,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x8e,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd2,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd8,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbl_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdf,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xda,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xbf,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xc0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xaa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xab,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xde,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xae,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xe0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s new file mode 100644 index 0000000000000..a67cb0bf4cf85 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s @@ -0,0 +1,3508 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +v_bfrev_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] + +v_bfrev_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] + +v_bfrev_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] + +v_bfrev_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ceil_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] + +v_ceil_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] + +v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_ceil_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] + +v_ceil_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] + +v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ceil_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] + +v_ceil_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] + +v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cls_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] + +v_cls_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] + +v_cls_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] + +v_cls_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] + +v_cls_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] + +v_cls_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] + +v_cls_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] + +v_cls_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] + +v_cls_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] + +v_cls_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] + +v_cls_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] + +v_cls_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] + +v_cls_i32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] + +v_cls_i32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] + +v_cls_i32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_clz_i32_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] + +v_clz_i32_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] + +v_clz_i32_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cos_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] + +v_cos_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] + +v_cos_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] + +v_cos_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] + +v_cos_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_cos_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] + +v_cos_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] + +v_cos_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] + +v_cos_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] + +v_cos_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ctz_i32_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] + +v_ctz_i32_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] + +v_ctz_i32_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_f16_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f16_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_cvt_f16_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] + +v_cvt_f16_i16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 +// GFX11: encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] + +v_cvt_f16_u16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_u16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_u16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] + +v_cvt_f16_u16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 +// GFX11: encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_f32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] + +v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_i32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_u32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte0_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte1_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte2_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte3_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f64_f32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_cvt_f64_i32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f64_u32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_floor_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_flr_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp +// GFX11: encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp +// GFX11: encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_i32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_i32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] + +v_cvt_i32_f64_e64 v255, 0xaf123456 clamp +// GFX11: encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_i32_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i32_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_i32_i16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_norm_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_off_f32_i4_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 +// GFX11: encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_u16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp +// GFX11: encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_u32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp +// GFX11: encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_u32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_u32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] + +v_cvt_u32_f64_e64 v255, 0xaf123456 clamp +// GFX11: encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_u32_u16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_u16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u32_u16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_u32_u16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_exp_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] + +v_exp_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] + +v_exp_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] + +v_exp_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] + +v_exp_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] + +v_exp_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] + +v_exp_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] + +v_exp_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] + +v_exp_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] + +v_exp_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] + +v_exp_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] + +v_exp_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] + +v_exp_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] + +v_exp_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] + +v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_exp_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] + +v_exp_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] + +v_exp_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] + +v_exp_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] + +v_exp_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] + +v_exp_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] + +v_exp_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] + +v_exp_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] + +v_exp_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] + +v_exp_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] + +v_exp_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] + +v_exp_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] + +v_exp_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] + +v_exp_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] + +v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ffbh_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] + +v_ffbh_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] + +v_ffbh_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbh_i32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ffbh_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] + +v_ffbh_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] + +v_ffbh_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbh_u32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ffbl_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] + +v_ffbl_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] + +v_ffbl_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbl_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_floor_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] + +v_floor_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] + +v_floor_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] + +v_floor_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] + +v_floor_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] + +v_floor_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] + +v_floor_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] + +v_floor_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] + +v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_floor_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] + +v_floor_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] + +v_floor_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] + +v_floor_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] + +v_floor_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] + +v_floor_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] + +v_floor_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] + +v_floor_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] + +v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_floor_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] + +v_floor_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] + +v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_fract_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] + +v_fract_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] + +v_fract_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] + +v_fract_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] + +v_fract_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] + +v_fract_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] + +v_fract_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] + +v_fract_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] + +v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_fract_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] + +v_fract_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] + +v_fract_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] + +v_fract_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] + +v_fract_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] + +v_fract_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] + +v_fract_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] + +v_fract_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] + +v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_fract_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] + +v_fract_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] + +v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] + +v_frexp_exp_i32_f64_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_mant_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] + +v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_frexp_mant_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_mant_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] + +v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] + +v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_log_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] + +v_log_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] + +v_log_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] + +v_log_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] + +v_log_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] + +v_log_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] + +v_log_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] + +v_log_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] + +v_log_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] + +v_log_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] + +v_log_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] + +v_log_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] + +v_log_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] + +v_log_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] + +v_log_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_log_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] + +v_log_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] + +v_log_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] + +v_log_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] + +v_log_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] + +v_log_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] + +v_log_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] + +v_log_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] + +v_log_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] + +v_log_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] + +v_log_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] + +v_log_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] + +v_log_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] + +v_log_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] + +v_log_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_mov_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] + +v_mov_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] + +v_mov_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] + +v_mov_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] + +v_mov_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] + +v_mov_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] + +v_mov_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] + +v_mov_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] + +v_mov_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] + +v_mov_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] + +v_mov_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] + +v_mov_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] + +v_mov_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] + +v_mov_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_movreld_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] + +v_movreld_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] + +v_movreld_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] + +v_movreld_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_movrels_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] + +v_movrels_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] + +v_movrelsd_2_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] + +v_movrelsd_2_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] + +v_movrelsd_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] + +v_movrelsd_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] + +v_nop_e64 +// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] + +v_not_b16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] + +v_not_b16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] + +v_not_b16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] + +v_not_b16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] + +v_not_b16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] + +v_not_b16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] + +v_not_b16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] + +v_not_b16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] + +v_not_b16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] + +v_not_b16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] + +v_not_b16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] + +v_not_b16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] + +v_not_b16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_not_b16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] + +v_not_b16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_not_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] + +v_not_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] + +v_not_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] + +v_not_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] + +v_not_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] + +v_not_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] + +v_not_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] + +v_not_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] + +v_not_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] + +v_not_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] + +v_not_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] + +v_not_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] + +v_not_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] + +v_not_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] + +v_not_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_pipeflush_e64 +// GFX11: encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rcp_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rcp_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] + +v_rcp_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] + +v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_rcp_iflag_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_iflag_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rndne_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] + +v_rndne_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] + +v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rndne_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] + +v_rndne_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] + +v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rndne_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] + +v_rndne_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] + +v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_rsq_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] + +v_rsq_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] + +v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rsq_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] + +v_rsq_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] + +v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rsq_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] + +v_rsq_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] + +v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_sat_pk_u8_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sin_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] + +v_sin_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] + +v_sin_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] + +v_sin_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] + +v_sin_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] + +v_sin_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] + +v_sin_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] + +v_sin_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] + +v_sin_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] + +v_sin_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] + +v_sin_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] + +v_sin_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] + +v_sin_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] + +v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_sin_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] + +v_sin_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] + +v_sin_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] + +v_sin_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] + +v_sin_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] + +v_sin_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] + +v_sin_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] + +v_sin_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] + +v_sin_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] + +v_sin_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] + +v_sin_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] + +v_sin_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] + +v_sin_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] + +v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_sqrt_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] + +v_sqrt_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] + +v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_sqrt_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] + +v_sqrt_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] + +v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_sqrt_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] + +v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_trunc_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] + +v_trunc_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] + +v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_trunc_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] + +v_trunc_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] + +v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_trunc_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] + +v_trunc_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] + +v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 97b7cc254fc81..67ecd5da929bf 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -798,267 +798,6 @@ # GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_bfrev_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, null ; encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ceil_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, null ; encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, null ; encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ceil_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_cls_i32_e64 v5, v1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, v255 ; encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, s1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, s105 ; encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, m0 ; encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, null ; encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, -1 ; encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_clz_i32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, null ; encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - # W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] # W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] 0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00 @@ -1110,141 +849,6 @@ # GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, null ; encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_cos_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, null ; encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_cos_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ctz_i32_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, null ; encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00 @@ -1425,6465 +1029,3966 @@ # GFX11: v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, null ; encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, 0x3800 mul:2 -0x05,0x00,0xd1,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_i16_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_i16_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] -0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, null ; encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, 0x3800 mul:2 -0x05,0x00,0xd0,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f16_u16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] -0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, null ; encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, null ; encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 ; encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_cvt_pk_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, v1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_i32_e64 v5, v255 ; encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_i32_e64 v5, s1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_i32_e64 v5, s105 ; encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, m0 ; encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, null ; encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, -1 ; encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u16_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_u16_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u16_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, null ; encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, v1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, v255 ; encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, s1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null ; encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] +0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte0_e64 v5, s105 ; encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, m0 ; encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_lo ; encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_hi ; encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, null ; encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, -1 ; encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pknorm_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pknorm_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pknorm_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, v1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_ubyte1_e64 v5, v255 ; encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_ubyte1_e64 v5, s1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte1_e64 v5, s105 ; encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, m0 ; encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_lo ; encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_hi ; encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, null ; encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, -1 ; encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pknorm_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pknorm_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pknorm_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, v1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_ubyte2_e64 v5, v255 ; encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_ubyte2_e64 v5, s1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte2_e64 v5, s105 ; encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, m0 ; encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_lo ; encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_hi ; encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_f32_ubyte2_e64 v5, null ; encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, -1 ; encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_f32_ubyte3_e64 v5, v1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_f32_ubyte3_e64 v5, v255 ; encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_f32_ubyte3_e64 v5, s1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte3_e64 v5, s105 ; encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, m0 ; encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_lo ; encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_hi ; encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_f32_ubyte3_e64 v5, null ; encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, -1 ; encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_f64_f32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_f64_f32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_f64_f32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_f64_f32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], null ; encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_f64_f32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 ; encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_i32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_i32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, v255, src_scc, src_scc ; encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] +0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03 -# GFX11: v_cvt_f64_i32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] +0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01 -# GFX11: v_cvt_f64_i32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04 -# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi ; encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] +0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01 -# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 ; encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] +0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61 -# GFX11: v_cvt_f64_i32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, m0, 0.5, v255 ; encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] +0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07 -# GFX11: v_cvt_f64_i32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| ; encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] +0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| ; encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] +0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, null, m0, -|m0| ; encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] +0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81 -# GFX11: v_cvt_f64_i32_e64 v[5:6], null ; encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| ; encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] +0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 ; encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] +0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b -# GFX11: v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 ; encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] +0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13 -# GFX11: v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 ; encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] +0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] +0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04 -# GFX11: v_cvt_f64_u32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] ; encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] +0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01 -# GFX11: v_cvt_f64_u32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| ; encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] +0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7 -# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] +0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1 -# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null ; encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] +0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61 -# GFX11: v_cvt_f64_u32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], null, 0.5, -src_scc ; encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] +0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83 -# GFX11: v_cvt_f64_u32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -1, -exec, |exec| ; encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] +0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41 -# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 ; encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] +0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9 -# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 ; encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] +0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33 -# GFX11: v_cvt_f64_u32_e64 v[5:6], null ; encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 ; encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_div_scale_f32 v5, vcc, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08 +# W32: v_div_scale_f32 v5, vcc_lo, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +# W64: v_div_scale_f32 v5, vcc, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10 +# W32: v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +# W64: v_div_scale_f32 v5, vcc, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# W32: v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +# W64: v_div_scale_f32 v5, vcc, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +# W64: v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_floor_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +# W64: v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_floor_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +# W64: v_div_scale_f32 v5, vcc, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +# W64: v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64: v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_floor_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64: v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64: v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +# W64: v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_floor_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00 +# W32: v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_floor_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +# W64: v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_floor_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +# W64: v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# W32: v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +# W64: v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +# W64: v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +# W64: v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +# W64: v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +# W64: v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +# W64: v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 -# GFX11: v_cvt_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 -# GFX11: v_cvt_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 -# GFX11: v_cvt_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 -# GFX11: v_cvt_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 -# GFX11: v_cvt_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 -# GFX11: v_cvt_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20 +# GFX11: v_fma_dx9_zero_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_i32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_i32_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_i32_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_i32_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_i32_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_i32_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_i32_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, null ; encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, 0x3800 -0x05,0x00,0xea,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_i32_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# GFX11: v_fma_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_norm_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_norm_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_norm_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_norm_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_norm_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_norm_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_norm_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_norm_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_norm_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_norm_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_norm_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_ldexp_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, v1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_ldexp_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, v255 ; encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_ldexp_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, s1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, s105 ; encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] +0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08 -# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] +0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10 -# GFX11: v_cvt_off_f32_i4_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_off_f32_i4_e64 v5, m0 ; encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, null ; encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s105 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, -1 ; encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[254:255], ttmp15 ; encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] +0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_ldexp_f64 v[5:6], s[2:3], vcc_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_ldexp_f64 v[5:6], s[104:105], vcc_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 ; encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] -0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], vcc, m0 ; encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_ldexp_f64 v[5:6], exec, exec_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], null, null ; encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] +0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08 -# GFX11: v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 ; encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] +0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30 -# GFX11: v_cvt_pk_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lerp_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lerp_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lerp_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lerp_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lerp_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_pk_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_lerp_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf +# GFX11: v_lerp_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lerp_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lerp_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lerp_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lerp_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lerp_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_lerp_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_pk_i16_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_lerp_u8 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lshl_add_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pk_i16_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lshl_add_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_pk_i16_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_lshl_add_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lshl_add_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lshl_add_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_norm_i16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_lshl_add_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_norm_i16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lshl_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lshl_or_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lshl_or_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lshl_or_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_norm_u16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_lshl_or_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_norm_u16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lshlrev_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_lshlrev_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, m0, 0x3800 +0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lshlrev_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lshlrev_b16 v5, 0x3800, m0 +0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_lshlrev_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_lshlrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lshlrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lshlrev_b64 v[5:6], v255, exec ; encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] +0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lshlrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lshlrev_b64 v[5:6], exec_hi, v[254:255] ; encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] +0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lshlrev_b64 v[5:6], null, null ; encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] +0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_lshlrev_b64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 ; encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf +# GFX11: v_lshlrev_b64 v[5:6], src_scc, src_scc ; encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] +0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] +0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_u16_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_lshrrev_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_lshrrev_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lshrrev_b16 v5, m0, 0x3800 +0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_lshrrev_b16 v5, 0x3800, m0 +0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_lshrrev_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_lshrrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_lshrrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_lshrrev_b64 v[5:6], v255, exec ; encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] +0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_lshrrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] +0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_lshrrev_b64 v[5:6], exec_hi, v[254:255] ; encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] +0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_lshrrev_b64 v[5:6], null, null ; encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] +0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_lshrrev_b64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 ; encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_lshrrev_b64 v[5:6], src_scc, src_scc ; encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] +0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] +0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null ; encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] -0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pknorm_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pknorm_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mad_i16 v5, m0, 0x3800, m0 +0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pknorm_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_pknorm_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_pknorm_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_pknorm_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pknorm_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_pknorm_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_mad_i32_i16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] +0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00 -# GFX11: v_cvt_pknorm_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_mad_i32_i16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] +0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07 -# GFX11: v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_mad_i32_i16 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] +0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01 -# GFX11: v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] +0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pknorm_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mad_i32_i16 v5, m0, 0x3800, m0 +0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_mad_i32_i16 v5, exec_lo, -1, exec_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] +0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_mad_i32_i16 v5, exec_hi, null, exec_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] +0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i32_i16 v5, null, exec_lo, null ; encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] +0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 ; encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pknorm_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_mad_i32_i16 v5, 0x3800, m0, -1 +0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03 -# GFX11: v_cvt_pknorm_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] +0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03 -# GFX11: v_cvt_pknorm_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] +0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_pknorm_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pknorm_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_mad_i32_i24 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_mad_i32_i24 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf +# GFX11: v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00] +0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00 -# GFX11: v_cvt_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00] +0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00 -# GFX11: v_cvt_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00] +0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00 -# GFX11: v_cvt_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +# W64: v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00 -# GFX11: v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01] +0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07] +# W64: v_mad_i64_i32 v[5:6], s[12:13], ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07] +0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07 -# GFX11: v_cvt_u32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, exec_lo, -1, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, exec_hi, null, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s12, null, exec_lo, null ; encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +# W64: v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null ; encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], s104, -1, exec_hi, -1 ; encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +# W64: v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 ; encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03 -# GFX11: v_cvt_u32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], vcc_lo, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +# W64: v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00 +# W32: v_mad_i64_i32 v[5:6], ttmp14, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +# W64: v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03 -# GFX11: v_cvt_u32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] +0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u32_f32_e64 v5, null ; encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_u32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_u32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# GFX11: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_u32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_u32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_u32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, m0, 0x3800, m0 +0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_u32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_u32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_u32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_u32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_u32_f64_e64 v5, null ; encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_u32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_u32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_u32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20 +# GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_mad_u32_u16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] +0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00 -# GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] +0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07 -# GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] +0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo ; encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] +0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_u32_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, m0, 0x3800, m0 +0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, exec_lo, -1, exec_hi ; encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] +0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, exec_hi, null, exec_lo ; encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] +0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, null, exec_lo, null ; encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] +0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01 -# GFX11: v_cvt_u32_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 ; encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_u32_u16_e64 v5, null ; encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, 0x3800, m0, -1 +0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03 -# GFX11: v_cvt_u32_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] +0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03 -# GFX11: v_cvt_u32_u16_e64 v5, 0x3800 -0x05,0x00,0xeb,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] +0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_u32_u16_e64 v5, src_scc ; encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_mad_u32_u24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mad_u32_u24 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_mad_u32_u24 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_mad_u32_u24 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_mad_u32_u24 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_mad_u32_u24 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_mad_u32_u24 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_div_fixup_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_div_fixup_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_div_fixup_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_div_fixup_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00 - -# GFX11: v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07 - -# GFX11: v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1 - -# GFX11: v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1 - -# GFX11: v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1 - -# GFX11: v_div_fixup_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01 - -# GFX11: v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b - -# GFX11: v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73 - -# GFX11: v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f32 v5, v255, src_scc, src_scc ; encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] -0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03 - -# GFX11: v_div_fmas_f32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] -0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01 - -# GFX11: v_div_fmas_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] -0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04 - -# GFX11: v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi ; encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] -0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01 - -# GFX11: v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 ; encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] -0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61 - -# GFX11: v_div_fmas_f32 v5, m0, 0.5, v255 ; encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] -0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07 - -# GFX11: v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| ; encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] -0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1 - -# GFX11: v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| ; encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] -0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1 - -# GFX11: v_div_fmas_f32 v5, null, m0, -|m0| ; encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] -0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81 - -# GFX11: v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| ; encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] -0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1 - -# GFX11: v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 ; encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] -0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b - -# GFX11: v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 ; encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] -0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13 - -# GFX11: v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 ; encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] -0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] -0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04 - -# GFX11: v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] ; encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] -0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01 - -# GFX11: v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| ; encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] -0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7 - -# GFX11: v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] -0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1 - -# GFX11: v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null ; encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] -0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61 - -# GFX11: v_div_fmas_f64 v[5:6], null, 0.5, -src_scc ; encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] -0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83 - -# GFX11: v_div_fmas_f64 v[5:6], -1, -exec, |exec| ; encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] -0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41 - -# GFX11: v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 ; encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] -0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9 - -# GFX11: v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 ; encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] -0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33 - -# GFX11: v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 ; encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -# W64: v_div_scale_f32 v5, vcc, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00 - -# W32: v_div_scale_f32 v5, vcc_lo, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -# W64: v_div_scale_f32 v5, vcc, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -# W64: v_div_scale_f32 v5, vcc, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -# W64: v_div_scale_f32 v5, vcc, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -# W64: v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04 - -# W32: v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -# W64: v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1 - -# W32: v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -# W64: v_div_scale_f32 v5, vcc, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -# W64: v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -# W64: v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1 - -# W32: v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -# W64: v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3 - -# W32: v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -# W64: v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b - -# W32: v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -# W64: v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33 - -# W32: v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -# W64: v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -# W64: v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -# W64: v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -# W64: v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -# W64: v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -# W64: v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -# W64: v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -# W64: v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -# W64: v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73 - -# W32: v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 - -# GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 - -# GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 - -# GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 - -# GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 - -# GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 - -# GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_exp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, null ; encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_exp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_floor_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, null ; encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, null ; encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_floor_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_dx9_zero_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_dx9_zero_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00 - -# GFX11: v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07 - -# GFX11: v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1 - -# GFX11: v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1 - -# GFX11: v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1 - -# GFX11: v_fma_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01 - -# GFX11: v_fma_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b - -# GFX11: v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73 - -# GFX11: v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_fract_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, null ; encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, null ; encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_fract_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_exp_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_exp_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20 - -# GFX11: v_frexp_exp_i32_f64_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_mant_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, null ; encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, null ; encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_mant_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_ldexp_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] -0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08 - -# GFX11: v_ldexp_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] -0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10 - -# GFX11: v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] -0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], s105 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[254:255], ttmp15 ; encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] -0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], s[2:3], vcc_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], s[104:105], vcc_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], vcc, m0 ; encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], exec, exec_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], null, null ; encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00 - -# GFX11: v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] -0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08 - -# GFX11: v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 ; encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] -0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30 - -# GFX11: v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_lerp_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_lerp_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_lerp_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_lerp_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_lerp_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_lerp_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_lerp_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_lerp_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_lerp_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_lerp_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_lerp_u8 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_log_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, null ; encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_log_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_log_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, null ; encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_log_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_log_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_add_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_lshl_add_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_lshl_add_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_lshl_add_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_lshl_add_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_lshl_add_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_lshl_add_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_add_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_lshl_add_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_lshl_add_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_lshl_or_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_lshl_or_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_lshl_or_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_lshl_or_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_lshl_or_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_lshl_or_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_lshl_or_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_lshl_or_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_lshl_or_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_lshlrev_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_lshlrev_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshlrev_b16 v5, m0, 0x3800 -0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshlrev_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, 0x3800, m0 -0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshlrev_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshlrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] -0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], v255, exec ; encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] -0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] -0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], exec_hi, v[254:255] ; encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] -0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], null, null ; encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] -0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00 - -# GFX11: v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 ; encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshlrev_b64 v[5:6], src_scc, src_scc ; encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] -0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00 - -# GFX11: v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_lshrrev_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_lshrrev_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshrrev_b16 v5, m0, 0x3800 -0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshrrev_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, 0x3800, m0 -0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshrrev_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshrrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] -0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], v255, exec ; encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] -0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], exec_lo, v[2:3] ; encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] -0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], exec_hi, v[254:255] ; encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] -0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], null, null ; encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] -0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00 - -# GFX11: v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 ; encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshrrev_b64 v[5:6], src_scc, src_scc ; encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] -0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00 - -# GFX11: v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 ; encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] -0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_mad_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_mad_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_mad_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_mad_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_mad_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_i16 v5, m0, 0x3800, m0 -0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_mad_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x53,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_i32_i16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_mad_i32_i16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] -0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00 - -# GFX11: v_mad_i32_i16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] -0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07 - -# GFX11: v_mad_i32_i16 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] -0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01 - -# GFX11: v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] -0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01 - -# GFX11: v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_i32_i16 v5, m0, 0x3800, m0 -0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_i32_i16 v5, exec_lo, -1, exec_hi ; encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] -0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01 - -# GFX11: v_mad_i32_i16 v5, exec_hi, null, exec_lo ; encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] -0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01 - -# GFX11: v_mad_i32_i16 v5, null, exec_lo, null ; encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] -0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01 - -# GFX11: v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 ; encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_i32_i16 v5, 0x3800, m0, -1 -0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03 - -# GFX11: v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] -0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03 - -# GFX11: v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] -0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_i32_i24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_mad_i32_i24 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_mad_i32_i24 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_mad_i32_i24 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_i32_i24 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_i32_i24 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_mad_i32_i24 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_i32_i24 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_mad_i32_i24 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_mad_i32_i24 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# W32: v_mad_i64_i32 v[5:6], s12, v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00] -# W64: v_mad_i64_i32 v[5:6], s[12:13], v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00] -0x05,0x0c,0xff,0xd6,0x01,0x05,0x18,0x00 - -# W32: v_mad_i64_i32 v[5:6], s12, v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00] -# W64: v_mad_i64_i32 v[5:6], s[12:13], v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00] -0x05,0x0c,0xff,0xd6,0xff,0xf7,0x18,0x00 - -# W32: v_mad_i64_i32 v[5:6], s12, s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00] -# W64: v_mad_i64_i32 v[5:6], s[12:13], s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00] -0x05,0x0c,0xff,0xd6,0x01,0x04,0x1a,0x00 - -# W32: v_mad_i64_i32 v[5:6], s12, s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] -# W64: v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] -0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00 - -# W32: v_mad_i64_i32 v[5:6], s12, vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01] -# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01] -0x05,0x0c,0xff,0xd6,0x6a,0xfe,0xa3,0x01 - -# W32: v_mad_i64_i32 v[5:6], s12, vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -# W64: v_mad_i64_i32 v[5:6], s[12:13], vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -0x05,0x0c,0xff,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf - -# W32: v_mad_i64_i32 v[5:6], s12, ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07] -# W64: v_mad_i64_i32 v[5:6], s[12:13], ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07] -0x05,0x0c,0xff,0xd6,0x7b,0xfa,0xf9,0x07 - -# W32: v_mad_i64_i32 v[5:6], s12, m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] -# W64: v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] -0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01 - -# W32: v_mad_i64_i32 v[5:6], s12, exec_lo, -1, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] -# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec ; encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] -0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01 - -# W32: v_mad_i64_i32 v[5:6], s12, exec_hi, null, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] -# W64: v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc ; encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01 - -# W32: v_mad_i64_i32 v[5:6], s12, null, exec_lo, null ; encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] -# W64: v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null ; encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] -0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01 - -# W32: v_mad_i64_i32 v[5:6], s104, -1, exec_hi, -1 ; encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] -# W64: v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 ; encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] -0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03 - -# W32: v_mad_i64_i32 v[5:6], vcc_lo, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -# W64: v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf - -# W32: v_mad_i64_i32 v[5:6], ttmp14, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] -# W64: v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] -0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03 - -# GFX11: v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_mad_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_mad_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_mad_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_mad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_mad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_u16 v5, m0, 0x3800, m0 -0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_mad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x41,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_u32_u16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_mad_u32_u16 v5, v255, v255, s3 ; encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] -0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00 - -# GFX11: v_mad_u32_u16 v5, s1, s2, v255 ; encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] -0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07 - -# GFX11: v_mad_u32_u16 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] -0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01 - -# GFX11: v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo ; encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] -0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01 - -# GFX11: v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_u32_u16 v5, m0, 0x3800, m0 -0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_u32_u16 v5, exec_lo, -1, exec_hi ; encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] -0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01 - -# GFX11: v_mad_u32_u16 v5, exec_hi, null, exec_lo ; encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] -0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01 - -# GFX11: v_mad_u32_u16 v5, null, exec_lo, null ; encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] -0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01 - -# GFX11: v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 ; encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_u32_u16 v5, 0x3800, m0, -1 -0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03 - -# GFX11: v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] -0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03 - -# GFX11: v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp ; encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] -0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mad_u32_u24 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_mad_u32_u24 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_mad_u32_u24 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_mad_u32_u24 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_mad_u32_u24 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mad_u32_u24 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_mad_u32_u24 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_mad_u32_u24 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_mad_u32_u24 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_mad_u32_u24 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# W32: v_mad_u64_u32 v[5:6], s12, v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00] -# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00] -0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00 - -# W32: v_mad_u64_u32 v[5:6], s12, v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00] -# W64: v_mad_u64_u32 v[5:6], s[12:13], v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00] -0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00 - -# W32: v_mad_u64_u32 v[5:6], s12, s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00] -# W64: v_mad_u64_u32 v[5:6], s[12:13], s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00] -0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00 - -# W32: v_mad_u64_u32 v[5:6], s12, s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] -# W64: v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] -0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00 - -# W32: v_mad_u64_u32 v[5:6], s12, vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01] -# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01] -0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01 - -# W32: v_mad_u64_u32 v[5:6], s12, vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf - -# W32: v_mad_u64_u32 v[5:6], s12, ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07] -# W64: v_mad_u64_u32 v[5:6], s[12:13], ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07] -0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07 - -# W32: v_mad_u64_u32 v[5:6], s12, m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] -# W64: v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] -0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01 - -# W32: v_mad_u64_u32 v[5:6], s12, exec_lo, -1, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] -# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] -0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01 - -# W32: v_mad_u64_u32 v[5:6], s12, exec_hi, null, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] -# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01 - -# W32: v_mad_u64_u32 v[5:6], s12, null, exec_lo, null ; encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] -# W64: v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null ; encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] -0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01 - -# W32: v_mad_u64_u32 v[5:6], s104, -1, exec_hi, -1 ; encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] -# W64: v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 ; encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] -0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03 - -# W32: v_mad_u64_u32 v[5:6], vcc_lo, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -# W64: v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf - -# W32: v_mad_u64_u32 v[5:6], ttmp14, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] -# W64: v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] -0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03 - -# GFX11: v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] -0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_max3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_max3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_max3_i16 v5, m0, 0x3800, m0 -0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_max3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_max3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_max3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_max3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_max3_u16 v5, m0, 0x3800, m0 -0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_max3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_max3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_max3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_max3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_max3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_max3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_max3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_max3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_max3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_max3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_max3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_max3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_max3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_max_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] -0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00 - -# GFX11: v_max_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] -0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00 - -# GFX11: v_max_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] -0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00 - -# GFX11: v_max_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] -0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00 - -# GFX11: v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] -0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20 - -# GFX11: v_max_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] -0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00 - -# GFX11: v_max_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00 - -# GFX11: v_max_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] -0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08 - -# GFX11: v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] -0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70 - -# GFX11: v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf - -# GFX11: v_max_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_max_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_max_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_max_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_i16 v5, m0, 0x3800 -0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_i16 v5, 0x3800, m0 -0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_max_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_max_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_max_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_u16 v5, m0, 0x3800 -0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_u16 v5, 0x3800, m0 -0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 - -# GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_maxmin_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_maxmin_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_maxmin_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_maxmin_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_maxmin_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_maxmin_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_maxmin_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_maxmin_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_maxmin_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_maxmin_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_maxmin_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_maxmin_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_maxmin_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_maxmin_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_maxmin_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_maxmin_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_maxmin_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_maxmin_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_maxmin_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_maxmin_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_maxmin_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_maxmin_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_maxmin_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_maxmin_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_maxmin_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_maxmin_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_maxmin_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_maxmin_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_mad_u32_u24 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mbcnt_hi_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mad_u32_u24 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mbcnt_hi_u32_b32 v5, v255, v255 ; encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mad_u32_u24 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mbcnt_hi_u32_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_mbcnt_hi_u32_b32 v5, s105, s105 ; encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v1, s2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00] +0x05,0x0c,0xfe,0xd6,0x01,0x05,0x18,0x00 -# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], v255, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00] +0x05,0x0c,0xfe,0xd6,0xff,0xf7,0x18,0x00 -# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# W32: v_mad_u64_u32 v[5:6], s12, s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], s1, v2, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00] +0x05,0x0c,0xfe,0xd6,0x01,0x04,0x1a,0x00 -# GFX11: v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +# W64: v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00 -# GFX11: v_mbcnt_hi_u32_b32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_lo, v255, s[104:105] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01] +0x05,0x0c,0xfe,0xd6,0x6a,0xfe,0xa3,0x01 -# GFX11: v_mbcnt_hi_u32_b32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +# W64: v_mad_u64_u32 v[5:6], s[12:13], vcc_hi, 0xaf123456, v[3:4] ; encoding: [0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +0x05,0x0c,0xfe,0xd6,0x6b,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf -# GFX11: v_mbcnt_hi_u32_b32 v5, exec_hi, null ; encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07] +# W64: v_mad_u64_u32 v[5:6], s[12:13], ttmp15, src_scc, v[254:255] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07] +0x05,0x0c,0xfe,0xd6,0x7b,0xfa,0xf9,0x07 -# GFX11: v_mbcnt_hi_u32_b32 v5, null, exec_lo ; encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] ; encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01 -# GFX11: v_mbcnt_hi_u32_b32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, exec_lo, -1, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec ; encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01 -# GFX11: v_mbcnt_hi_u32_b32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, exec_hi, null, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc ; encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00 +# W32: v_mad_u64_u32 v[5:6], s12, null, exec_lo, null ; encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +# W64: v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null ; encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01 -# GFX11: v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# W32: v_mad_u64_u32 v[5:6], s104, -1, exec_hi, -1 ; encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +# W64: v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 ; encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03 -# GFX11: v_mbcnt_lo_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00 +# W32: v_mad_u64_u32 v[5:6], vcc_lo, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +# W64: v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 ; encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_mbcnt_lo_u32_b32 v5, v255, v255 ; encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00 +# W32: v_mad_u64_u32 v[5:6], ttmp14, src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +# W64: v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc ; encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03 -# GFX11: v_mbcnt_lo_u32_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp ; encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] +0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_mbcnt_lo_u32_b32 v5, s105, s105 ; encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_max3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_max3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_max3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_max3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mbcnt_lo_u32_b32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_max3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mbcnt_lo_u32_b32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_max3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mbcnt_lo_u32_b32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_mbcnt_lo_u32_b32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_max3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mbcnt_lo_u32_b32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_max3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mbcnt_lo_u32_b32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_med3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_med3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_max3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_max3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43 +# GFX11: v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23 +# GFX11: v_max3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_med3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_med3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_med3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_max3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_max3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_max3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf +# GFX11: v_max3_i16 v5, m0, 0x3800, m0 +0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_max3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b +# GFX11: v_max3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x4d,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33 +# GFX11: v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf +# GFX11: v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_max3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_i16 v5, m0, 0x3800, m0 -0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_max3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_max3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_max3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_max3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_med3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_max3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_max3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_max3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_max3_u16 v5, m0, 0x3800, m0 +0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_max3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_max3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x4e,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_max3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_med3_u16 v5, m0, 0x3800, m0 -0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_max3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_max3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_max3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_max3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# GFX11: v_max3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_med3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_med3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_med3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_med3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] +0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00 -# GFX11: v_med3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_max_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00 -# GFX11: v_med3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_max_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00 -# GFX11: v_med3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] +0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00 -# GFX11: v_med3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_med3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_max_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] +0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20 -# GFX11: v_med3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_max_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] +0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00 -# GFX11: v_med3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_max_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_med3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_max_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] +0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08 -# GFX11: v_med3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] +0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70 -# GFX11: v_med3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf -# GFX11: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_min3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_min3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_max_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max_i16 v5, m0, 0x3800 +0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_max_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# GFX11: v_max_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43 +# GFX11: v_max_i16 v5, 0x3800, m0 +0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23 +# GFX11: v_max_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# GFX11: v_max_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_min3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_max_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_min3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_max_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_min3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_max_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_min3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_max_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_min3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_max_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_min3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_max_u16 v5, m0, 0x3800 +0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_min3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_max_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_min3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf +# GFX11: v_max_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b +# GFX11: v_max_u16 v5, 0x3800, m0 +0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33 +# GFX11: v_max_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf +# GFX11: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_min3_i16 v5, m0, 0x3800, m0 -0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 -# GFX11: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_maxmin_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_maxmin_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_maxmin_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_maxmin_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_min3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_min3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_maxmin_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_min3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_min3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_min3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_min3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_maxmin_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_maxmin_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_maxmin_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_maxmin_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_maxmin_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_min3_u16 v5, m0, 0x3800, m0 -0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_maxmin_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_maxmin_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_maxmin_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] -0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_maxmin_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] -0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_maxmin_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_maxmin_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 +# GFX11: v_maxmin_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_maxmin_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_maxmin_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_maxmin_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_maxmin_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_maxmin_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_min3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_maxmin_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_maxmin_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_maxmin_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_min3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_min3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_maxmin_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_min3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_maxmin_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_min3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_maxmin_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_min3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_maxmin_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_min_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] -0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, v255, v255 ; encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_min_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] -0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_min_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] -0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, s105, s105 ; encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_min_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] -0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_min_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] -0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20 +# GFX11: v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_min_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] -0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_min_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_min_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] -0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08 +# GFX11: v_mbcnt_hi_u32_b32 v5, exec_hi, null ; encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] -0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70 +# GFX11: v_mbcnt_hi_u32_b32 v5, null, exec_lo ; encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_mbcnt_hi_u32_b32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_min_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_min_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_min_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_min_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_min_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, v255, v255 ; encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_min_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, s105, s105 ; encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_min_i16 v5, m0, 0x3800 -0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_min_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_min_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_min_i16 v5, 0x3800, m0 -0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_min_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_min_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_min_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_min_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_min_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_med3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_min_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_med3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_min_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_med3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_med3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_min_u16 v5, m0, 0x3800 -0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_med3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_med3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_min_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_med3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_med3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_min_u16 v5, 0x3800, m0 -0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_min_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_med3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_med3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# GFX11: v_med3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_med3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_med3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_med3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 +# GFX11: v_med3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_med3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b +# GFX11: v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33 +# GFX11: v_med3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 +# GFX11: v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_minmax_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_minmax_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_med3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_minmax_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_med3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_med3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_minmax_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_med3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_minmax_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_med3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_med3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_minmax_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i16 v5, m0, 0x3800, m0 +0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_med3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b +# GFX11: v_med3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x50,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33 +# GFX11: v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_minmax_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_med3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_minmax_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_minmax_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_med3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_minmax_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_med3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_minmax_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_med3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_minmax_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_med3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_minmax_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_med3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_med3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_minmax_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_minmax_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_med3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_minmax_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_med3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_minmax_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_med3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_med3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_minmax_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_med3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_minmax_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_med3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_minmax_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_med3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_minmax_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_med3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_minmax_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_med3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_med3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_minmax_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_med3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_minmax_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_med3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_minmax_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_med3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_med3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_minmax_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_med3_u16 v5, m0, 0x3800, m0 +0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_minmax_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_med3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_minmax_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_med3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x51,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_minmax_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mov_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_med3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mov_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mov_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_mov_b32_e64 v5, s105 ; encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mov_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mov_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mov_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mov_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_mov_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mov_b32_e64 v5, null ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mov_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mov_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mov_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_med3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_med3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_med3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_med3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_movreld_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_med3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_movreld_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_movreld_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_movreld_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_movreld_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_movreld_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_movreld_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_movreld_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_movreld_b32_e64 v5, null ; encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_movreld_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_movreld_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x49,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_movreld_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_min3_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_min3_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_min3_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] -0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01 +# GFX11: v_min3_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] -0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01 +# GFX11: v_min3_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] -0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01 +# GFX11: v_min3_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] -0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01 +# GFX11: v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] -0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00 +# GFX11: v_min3_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] -0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04 +# GFX11: v_min3_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] -0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01 +# GFX11: v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] -0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07 +# GFX11: v_min3_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] -0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01 +# GFX11: v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] -0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01 +# GFX11: v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_mqsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] -0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01 +# GFX11: v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] -0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03 +# GFX11: v_min3_i16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] -0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03 +# GFX11: v_min3_i16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_min3_i16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] -0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07 +# GFX11: v_min3_i16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] -0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07 +# GFX11: v_min3_i16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07 +# GFX11: v_min3_i16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07 +# GFX11: v_min3_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07 +# GFX11: v_min3_i16 v5, m0, 0x3800, m0 +0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07 +# GFX11: v_min3_i16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07 +# GFX11: v_min3_i16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x4a,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07 +# GFX11: v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07 +# GFX11: v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07 +# GFX11: v_min3_i16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mqsad_u32_u8 v[5:8], null, null, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] -0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07 +# GFX11: v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] -0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07 +# GFX11: v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] -0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07 +# GFX11: v_min3_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] -0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07 +# GFX11: v_min3_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp ; encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] -0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf +# GFX11: v_min3_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_msad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_min3_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_msad_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_min3_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_msad_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_min3_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_msad_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_min3_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_msad_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_min3_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_msad_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_min3_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_msad_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_min3_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_msad_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_min3_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_msad_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_min3_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_msad_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_min3_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_msad_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_min3_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_msad_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_min3_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_msad_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_min3_u16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_msad_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_min3_u16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_min3_u16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_min3_u16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mul_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] -0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00 +# GFX11: v_min3_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mul_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] -0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00 +# GFX11: v_min3_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] -0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00 +# GFX11: v_min3_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mul_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] -0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00 +# GFX11: v_min3_u16 v5, m0, 0x3800, m0 +0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min3_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mul_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] -0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20 +# GFX11: v_min3_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x4b,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mul_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] -0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00 +# GFX11: v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] +0x05,0x78,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00 +# GFX11: v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mul_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] -0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08 +# GFX11: v_min3_u16 v5, 0x3800, m0, 0x3800 op_sel:[0,1,0,0] +0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] -0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70 +# GFX11: v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] ; encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_hi_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_min3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mul_hi_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_min3_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mul_hi_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_min3_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mul_hi_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_min3_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mul_hi_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_min3_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mul_hi_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min3_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_min3_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_mul_hi_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_min3_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mul_hi_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_min3_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mul_hi_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_min3_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_mul_hi_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_min3_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_min3_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_mul_hi_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_min3_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_mul_hi_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_min3_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_mul_hi_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_mul_hi_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_min_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] +0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00 -# GFX11: v_mul_hi_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_min_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] +0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00 -# GFX11: v_mul_hi_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_min_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] +0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00 -# GFX11: v_mul_hi_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_min_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] +0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00 -# GFX11: v_mul_hi_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_min_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] +0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20 -# GFX11: v_mul_hi_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_min_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] +0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00 -# GFX11: v_mul_hi_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_min_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_mul_hi_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_min_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] +0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08 -# GFX11: v_mul_hi_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] +0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70 -# GFX11: v_mul_hi_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_min_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_mul_hi_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_min_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_mul_hi_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_min_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_min_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_min_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_mul_lo_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_min_i16 v5, m0, 0x3800 +0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_mul_lo_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_min_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, m0, 0x3800 -0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_min_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_min_i16 v5, 0x3800, m0 +0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_min_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_min_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u16 v5, 0x3800, m0 -0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_mul_lo_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_min_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_mul_lo_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_min_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_min_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_min_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_min_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_mul_lo_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_min_u16 v5, m0, 0x3800 +0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_mul_lo_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_mul_lo_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_min_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_min_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_min_u16 v5, 0x3800, m0 +0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_min_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_mul_lo_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mul_lo_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mul_lo_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mullit_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_mullit_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_mullit_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_mullit_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_mullit_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1 +# GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_mullit_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_mullit_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1 +# GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_mullit_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3 +# GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 -# GFX11: v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b +# GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33 +# GFX11: v_minmax_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] -0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_minmax_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_not_b16_e64 v5, v255 ; encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_not_b16_e64 v5, s1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_not_b16_e64 v5, s105 ; encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_not_b16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_not_b16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_not_b16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_not_b16_e64 v5, m0 ; encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_not_b16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_not_b16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_not_b16_e64 v5, null ; encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_not_b16_e64 v5, -1 ; encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_not_b16_e64 v5, 0x3800 -0x05,0x00,0xe9,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_not_b16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_not_b16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_minmax_i32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_not_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_minmax_i32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_not_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_not_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_not_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_not_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_not_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_not_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_not_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_not_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_not_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_not_b32_e64 v5, null ; encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_minmax_i32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_not_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_minmax_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_not_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_minmax_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_not_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_minmax_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_not_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_or3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_minmax_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_or3_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_or3_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_minmax_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_or3_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_minmax_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_or3_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_minmax_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_or3_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_or3_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_minmax_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_or3_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_minmax_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_or3_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_minmax_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_or3_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_minmax_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_or3_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_minmax_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_or3_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] +0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01 -# GFX11: v_or3_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] +0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01 -# GFX11: v_or3_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] +0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01 -# GFX11: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] +0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01 -# GFX11: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] +0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00 -# GFX11: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] +0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04 -# GFX11: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] +0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01 -# GFX11: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] +0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07 -# GFX11: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] +0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01 -# GFX11: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] +0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01 -# GFX11: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] +0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01 -# GFX11: v_or_b16 v5, m0, 0x3800 -0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] +0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03 -# GFX11: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] +0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03 -# GFX11: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] +0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07 -# GFX11: v_or_b16 v5, 0x3800, m0 -0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] +0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07 -# GFX11: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07 -# GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], null, null, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] +0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07 -# GFX11: v_pack_b32_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] +0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07 -# GFX11: v_pack_b32_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] +0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07 -# GFX11: v_pack_b32_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] ; encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] +0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07 -# GFX11: v_pack_b32_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp ; encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] +0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf -# GFX11: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_msad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_msad_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_msad_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 +# GFX11: v_msad_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00 +# GFX11: v_msad_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_perm_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01 +# GFX11: v_msad_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_perm_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01 +# GFX11: v_msad_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_perm_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01 +# GFX11: v_msad_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_perm_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04 +# GFX11: v_msad_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_perm_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_msad_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_perm_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01 +# GFX11: v_msad_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_perm_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01 +# GFX11: v_msad_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_perm_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01 +# GFX11: v_msad_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_perm_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX11: v_msad_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_perm_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_perm_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03 +# GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_perm_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_mul_f64 v[5:6], v[254:255], v[254:255] ; encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] +0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00 -# GFX11: v_perm_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03 +# GFX11: v_mul_f64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] +0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00 -# GFX11: v_perm_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_mul_f64 v[5:6], s[104:105], s[104:105] ; encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] +0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00 -# GFX11: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] -0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00 +# GFX11: v_mul_f64 v[5:6], vcc, ttmp[14:15] ; encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] +0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00 -# GFX11: v_permlane16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01 +# GFX11: v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_permlane16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01 +# GFX11: v_mul_f64 v[5:6], -|exec|, src_scc ; encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] +0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20 -# GFX11: v_permlane16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xfc,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xd7,0xfc,0x01 +# GFX11: v_mul_f64 v[5:6], null, 0.5 ; encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] +0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00 -# GFX11: v_permlane16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf8,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf8,0x01 +# GFX11: v_mul_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_permlane16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xf4,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xfb,0xf4,0x01 +# GFX11: v_mul_f64 v[5:6], 0.5, null mul:2 ; encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] +0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08 -# GFX11: v_permlane16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xac,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xff,0xac,0x01 +# GFX11: v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 ; encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] +0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70 -# GFX11: v_permlane16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xa8,0x01] -0x05,0x00,0x5b,0xd6,0x01,0xfd,0xa8,0x01 +# GFX11: v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf -# GFX11: v_permlane16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5b,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_permlane16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0xf5,0x03] -0x05,0x00,0x5b,0xd6,0x01,0x83,0xf5,0x03 +# GFX11: v_mul_hi_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_permlane16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03] -0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03 +# GFX11: v_mul_hi_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_permlane16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03] -0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03 +# GFX11: v_mul_hi_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_permlane16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] -0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 +# GFX11: v_mul_hi_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_permlanex16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01 +# GFX11: v_mul_hi_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_permlanex16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01 +# GFX11: v_mul_hi_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01 +# GFX11: v_mul_hi_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01 +# GFX11: v_mul_hi_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01 +# GFX11: v_mul_hi_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01 +# GFX11: v_mul_hi_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01 +# GFX11: v_mul_hi_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_permlanex16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03] -0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03 +# GFX11: v_mul_hi_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_permlanex16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03] -0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03 +# GFX11: v_mul_hi_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_permlanex16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03] -0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03 +# GFX11: v_mul_hi_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_permlanex16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_pipeflush ; encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] -0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00 +# GFX11: v_mul_hi_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] -0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01 +# GFX11: v_mul_hi_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] -0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01 +# GFX11: v_mul_hi_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] -0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01 +# GFX11: v_mul_hi_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] -0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01 +# GFX11: v_mul_hi_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] -0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00 +# GFX11: v_mul_hi_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] -0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04 +# GFX11: v_mul_hi_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] -0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01 +# GFX11: v_mul_hi_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] -0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07 +# GFX11: v_mul_hi_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] -0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01 +# GFX11: v_mul_hi_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] -0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01 +# GFX11: v_mul_hi_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] -0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01 +# GFX11: v_mul_hi_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] -0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03 +# GFX11: v_mul_lo_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] -0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03 +# GFX11: v_mul_lo_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf +# GFX11: v_mul_lo_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_lo_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_rcp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, m0, 0x3800 +0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_rcp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_rcp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, 0x3800, m0 +0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mul_lo_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_rcp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_rcp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_mul_lo_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_rcp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_mul_lo_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_rcp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_rcp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_rcp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_rcp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mul_lo_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_f32_e64 v5, null ; encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_rcp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_rcp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_mullit_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_rcp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_mullit_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_mullit_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_rcp_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_rcp_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_rcp_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_rcp_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_rcp_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_rcp_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_rcp_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_rcp_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_or3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_or3_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_or3_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_or3_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_rcp_iflag_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_iflag_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_rcp_iflag_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_iflag_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_rcp_iflag_f32_e64 v5, null ; encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_rcp_iflag_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_or3_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_rcp_iflag_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rcp_iflag_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_readlane_b32 s5, v1, s2 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] -0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 +# GFX11: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_readlane_b32 s5, v1, s105 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] -0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00 +# GFX11: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_readlane_b32 s105, v1, ttmp15 ; encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] -0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00 +# GFX11: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_readlane_b32 vcc_lo, v1, vcc_hi ; encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] -0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00 +# GFX11: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_readlane_b32 vcc_hi, v1, vcc_lo ; encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] -0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00 +# GFX11: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_readlane_b32 ttmp15, v1, m0 ; encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] -0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00 +# GFX11: v_or_b16 v5, m0, 0x3800 +0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_readlane_b32 null, v255, null ; encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] -0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00 +# GFX11: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_rndne_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_or_b16 v5, 0x3800, m0 +0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_rndne_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_rndne_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, null ; encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_rndne_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_pack_b32_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_rndne_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_pack_b32_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_rndne_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_rndne_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_rndne_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_rndne_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_rndne_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_rndne_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_rndne_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_rndne_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_rndne_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_rndne_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_rndne_f32_e64 v5, null ; encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_rndne_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_rndne_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_perm_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_rndne_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_perm_b32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_perm_b32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_rndne_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_perm_b32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_rndne_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_perm_b32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rndne_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_rndne_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_rndne_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_perm_b32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_rndne_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_perm_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rndne_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] +0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00 -# GFX11: v_rndne_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01 -# GFX11: v_rndne_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01 -# GFX11: v_rndne_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_permlane16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xfc,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xd7,0xfc,0x01 -# GFX11: v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_permlane16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf8,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf8,0x01 -# GFX11: v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_permlane16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xf4,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xfb,0xf4,0x01 -# GFX11: v_rsq_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xac,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xff,0xac,0x01 -# GFX11: v_rsq_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xa8,0x01] +0x05,0x00,0x5b,0xd6,0x01,0xfd,0xa8,0x01 -# GFX11: v_rsq_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5b,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0xf5,0x03] +0x05,0x00,0x5b,0xd6,0x01,0x83,0xf5,0x03 -# GFX11: v_rsq_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03] +0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03 -# GFX11: v_rsq_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03] +0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03 -# GFX11: v_rsq_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 -# GFX11: v_rsq_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01 -# GFX11: v_rsq_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01 -# GFX11: v_rsq_f16_e64 v5, null ; encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01 -# GFX11: v_rsq_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01 -# GFX11: v_rsq_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_permlanex16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01 -# GFX11: v_rsq_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_permlanex16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01 -# GFX11: v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01 -# GFX11: v_rsq_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03] +0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03 -# GFX11: v_rsq_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03] +0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03 -# GFX11: v_rsq_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03 -# GFX11: v_rsq_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01 -# GFX11: v_rsq_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] +0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01 -# GFX11: v_rsq_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01 -# GFX11: v_rsq_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] +0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01 -# GFX11: v_rsq_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] +0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00 -# GFX11: v_rsq_f32_e64 v5, null ; encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04 -# GFX11: v_rsq_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] +0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01 -# GFX11: v_rsq_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] +0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07 -# GFX11: v_rsq_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] +0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01 -# GFX11: v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] +0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01 -# GFX11: v_rsq_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] +0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01 -# GFX11: v_rsq_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] +0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03 -# GFX11: v_rsq_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] +0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03 -# GFX11: v_rsq_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s5, v1, s2 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s5, v1, s105 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s105, v1, ttmp15 ; encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] +0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_readlane_b32 vcc_lo, v1, vcc_hi ; encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] +0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_readlane_b32 vcc_hi, v1, vcc_lo ; encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] +0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_readlane_b32 ttmp15, v1, m0 ; encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] +0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_readlane_b32 null, v255, null ; encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] +0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00 # GFX11: v_sad_hi_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00 @@ -8065,267 +5170,6 @@ # GFX11: v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, null ; encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sin_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, null ; encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sin_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_sqrt_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, null ; encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sqrt_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, null ; encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sqrt_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_sqrt_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sqrt_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - # W32: v_sub_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] # W64: v_sub_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00 @@ -8624,132 +5468,6 @@ # GFX11: v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] 0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf -# GFX11: v_trunc_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, null ; encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, null ; encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_trunc_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - # GFX11: v_writelane_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 65631e3b90929..3dd7727a3dabd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -602,216 +602,6 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - # W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff @@ -873,132 +663,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -1167,1643 +831,341 @@ # GFX11: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 # GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -2889,90 +1251,6 @@ # GFX11: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - # GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -4143,263 +2421,53 @@ # GFX11: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] 0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -4527,90 +2595,6 @@ # GFX11: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -4737,300 +2721,6 @@ # GFX11: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - # GFX11: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -5199,216 +2889,6 @@ # GFX11: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -5561,90 +3041,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - # GFX11: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt new file mode 100644 index 0000000000000..19a5c36118337 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -0,0 +1,2605 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 48eccc0b1fcc0..3f4f44a479fd5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -332,48 +332,6 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] # W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 @@ -399,36 +357,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] 0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 -# GFX11: v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -573,180 +501,6 @@ # GFX11: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x0e,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - # GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -843,72 +597,6 @@ # GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0xff,0x03,0x22,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -945,66 +633,6 @@ # GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x13,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -1053,30 +681,6 @@ # GFX11: v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x15,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1881,36 +1485,6 @@ # GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1989,18 +1563,6 @@ # GFX11: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x18,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2079,90 +1641,6 @@ # GFX11: v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x44,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2307,60 +1785,6 @@ # GFX11: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x80,0x22,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -2405,30 +1829,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt new file mode 100644 index 0000000000000..4ea57003eeeb9 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -0,0 +1,601 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt new file mode 100644 index 0000000000000..cba7fa924be2c --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -0,0 +1,3283 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, null ; encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, null ; encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, null ; encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cls_i32_e64 v5, v1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, v255 ; encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, s1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, s105 ; encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, m0 ; encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, null ; encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, -1 ; encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_clz_i32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, null ; encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, null ; encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cos_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, null ; encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cos_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_ctz_i32_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, null ; encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f16_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, null ; encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, 0x3800 mul:2 +0x05,0x00,0xd1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_i16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, null ; encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, 0x3800 mul:2 +0x05,0x00,0xd0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_u16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, null ; encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, null ; encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 ; encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_i32_e64 v5, v1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, v255 ; encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, s1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, s105 ; encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, m0 ; encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, null ; encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, -1 ; encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_i32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, null ; encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_u32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte0_e64 v5, v1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, v255 ; encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, s1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, s105 ; encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, m0 ; encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_lo ; encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_hi ; encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, null ; encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, -1 ; encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte1_e64 v5, v1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, v255 ; encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, s1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, s105 ; encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, m0 ; encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_lo ; encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_hi ; encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, null ; encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, -1 ; encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte2_e64 v5, v1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, v255 ; encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, s1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, s105 ; encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, m0 ; encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_lo ; encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_hi ; encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, null ; encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, -1 ; encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte3_e64 v5, v1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, v255 ; encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, s1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, s105 ; encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, m0 ; encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_lo ; encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_hi ; encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, null ; encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, -1 ; encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_f32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], null ; encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 ; encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_i32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], null ; encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_u32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], null ; encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_floor_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_cvt_i32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i32_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, null ; encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, 0x3800 +0x05,0x00,0xea,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_norm_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, v1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, v255 ; encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, s1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, s105 ; encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, m0 ; encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, null ; encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, -1 ; encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_off_f32_i4_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 ; encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] +0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, null ; encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_u32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, null ; encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, null ; encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, 0x3800 +0x05,0x00,0xeb,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, src_scc ; encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_exp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, null ; encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_exp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, null ; encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, null ; encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, null ; encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, null ; encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_frexp_exp_i32_f64_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, null ; encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, null ; encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_log_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, null ; encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_log_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_log_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, null ; encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_log_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_log_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_mov_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, s105 ; encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, null ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, null ; encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] +0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, v255 ; encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, s1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, s105 ; encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, m0 ; encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, null ; encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, -1 ; encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, 0x3800 +0x05,0x00,0xe9,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, null ; encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_pipeflush ; encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] +0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, null ; encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_iflag_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, null ; encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_iflag_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, null ; encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, null ; encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, null ; encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, null ; encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, null ; encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sin_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, null ; encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sin_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, null ; encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, null ; encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, null ; encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, null ; encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf From 957eed0b1af2cb88edafe1ff2643a38165c67a40 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Wed, 12 Oct 2022 09:20:05 +0000 Subject: [PATCH 014/516] [InstCombine] Remove redundant splats in InstCombineVectorOps Splatting the first vector element of the result of a BinOp, where any of the BinOp's operands are the result of a first vector element splat can be simplified to splatting the first vector element of the result of the BinOp Differential Revision: https://reviews.llvm.org/D135876 --- .../InstCombine/InstCombineInternal.h | 1 + .../InstCombine/InstCombineVectorOps.cpp | 32 ++- .../Transforms/InstCombine/shuffle-binop.ll | 60 +++-- .../AArch64/insert-shuffle-binop.ll | 216 ------------------ 4 files changed, 78 insertions(+), 231 deletions(-) delete mode 100644 llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 3f1bcea3727f5..11aed7754c264 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -167,6 +167,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *simplifyBinOpSplats(ShuffleVectorInst &SVI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); Instruction *visitLandingPadInst(LandingPadInst &LI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d50918629ba5c..6581fe0b9dc91 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2598,6 +2598,34 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { return new ShuffleVectorInst(X, Y, NewMask); } +// Splatting the first element of the result of a BinOp, where any of the +// BinOp's operands are the result of a first element splat can be simplified to +// splatting the first element of the result of the BinOp +Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { + if (!SVI.isZeroEltSplat()) + return nullptr; + + Value *Op0 = SVI.getOperand(0); + Value *X, *Y; + if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()), + m_Value(Y))) && + !match(Op0, m_BinOp(m_Value(X), + m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask())))) + return nullptr; + if (X->getType() != Y->getType()) + return nullptr; + + auto *BinOp = cast(Op0); + if (!isSafeToSpeculativelyExecute(BinOp)) + return nullptr; + + Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y); + if (auto NewBOI = dyn_cast(NewBO)) + NewBOI->copyIRFlags(BinOp); + + return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); +} + Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -2606,7 +2634,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); - // Bail out for scalable vectors + if (Instruction *I = simplifyBinOpSplats(SVI)) + return I; + if (isa(LHS->getType())) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/shuffle-binop.ll b/llvm/test/Transforms/InstCombine/shuffle-binop.ll index fe2d1af5a04f3..c26c293c695af 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-binop.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-binop.ll @@ -50,13 +50,13 @@ define <4 x i8> @splat_binop_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = add <4 x i8> [[XSPLAT]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) - %b = add <4 x i8> %xsplat, %y + %b = add nsw <4 x i8> %xsplat, %y %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } @@ -65,14 +65,14 @@ define <4 x i8> @splat_binop_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = sub <4 x i8> [[X:%.*]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i8> [[X:%.*]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) %b = sub <4 x i8> %x, %ysplat - %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer + %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> ret <4 x i8> %bsplat } @@ -82,21 +82,40 @@ define <4 x i8> @splat_binop_splat_x_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i8> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) - %b = mul <4 x i8> %xsplat, %ysplat + %b = mul nuw <4 x i8> %xsplat, %ysplat %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } -define @vscale_splat_binop_splat_x( %x, %y) { -; CHECK-LABEL: @vscale_splat_binop_splat_x( +define <4 x float> @splat_binop_splat_x_splat_y_fmath_flags(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @splat_binop_splat_x_splat_y_fmath_flags( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[XSPLAT]]) +; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[YSPLAT]]) +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[BSPLAT]] +; + %xsplat = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %xsplat) + %ysplat = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %ysplat) + %b = fmul fast <4 x float> %xsplat, %ysplat + %bsplat = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %bsplat +} + +define @vscale_splat_udiv_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_udiv_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer ; CHECK-NEXT: [[B:%.*]] = udiv [[XSPLAT]], [[Y:%.*]] ; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer @@ -108,6 +127,19 @@ define @vscale_splat_binop_splat_x( %x, %bsplat } +define @vscale_splat_urem_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_urem_splat_x( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer +; CHECK-NEXT: [[B:%.*]] = urem [[XSPLAT]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: ret [[BSPLAT]] +; + %xsplat = shufflevector %x, poison, zeroinitializer + %b = urem %xsplat, %y + %bsplat = shufflevector %b, poison, zeroinitializer + ret %bsplat +} + define @vscale_splat_binop_splat_y( %x, %y) { ; CHECK-LABEL: @vscale_splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer @@ -140,8 +172,8 @@ define @vscale_splat_binop_splat_x_splat_y_calls( [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer ; CHECK-NEXT: call void @use_v( [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = lshr [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = lshr [[X]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer ; CHECK-NEXT: ret [[BSPLAT]] ; %xsplat = shufflevector %x, poison, zeroinitializer @@ -154,4 +186,4 @@ define @vscale_splat_binop_splat_x_splat_y_calls() -declare void @use_v() \ No newline at end of file +declare void @use_v() diff --git a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll b/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll deleted file mode 100644 index c75f53bc68583..0000000000000 --- a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll +++ /dev/null @@ -1,216 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='vector-combine' -S %s | FileCheck %s - -target triple = "aarch64-none-eabi" - -define @fadd_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fadd fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fsub fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} From d7d743621a0d5d13ed54d358944857ccba598299 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 2 Nov 2022 07:56:43 -0400 Subject: [PATCH 015/516] Reenable POSIX builtin library functions in gnu2x mode gnu17 and earlier modes automatically expose several POSIX C APIs, and this was accidentally disabled for gnu2x in 7d644e1215b376ec5e915df9ea2eeb56e2d94626. This restores the behavior for gnu2x mode (without changing the behavior in C standards modes instead of GNU modes). Fixes #56607 --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/lib/Sema/SemaLookup.cpp | 8 +++----- clang/test/Sema/gnu-builtins.c | 13 +++++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 clang/test/Sema/gnu-builtins.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7697f10daeef0..1198926974bff 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -274,6 +274,10 @@ Bug Fixes result in a stack overflow. `Issue 44304 `_ `Issue 50891 `_ +- Clang 14 predeclared some builtin POSIX library functions in ``gnu2x`` mode, + and Clang 15 accidentally stopped predeclaring those functions in that + language mode. Clang 16 now predeclares those functions again. This fixes + `Issue 56607 `_. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 845fa2f56df2f..39e88bccfef64 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -941,11 +941,9 @@ bool Sema::LookupBuiltin(LookupResult &R) { // If this is a builtin on this (or all) targets, create the decl. if (unsigned BuiltinID = II->getBuiltinID()) { - // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any - // predefined library functions like 'malloc'. Instead, we'll just - // error. - if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL || - getLangOpts().C2x) && + // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined + // library functions like 'malloc'. Instead, we'll just error. + if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL) && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return false; diff --git a/clang/test/Sema/gnu-builtins.c b/clang/test/Sema/gnu-builtins.c new file mode 100644 index 0000000000000..c4da8b39363cd --- /dev/null +++ b/clang/test/Sema/gnu-builtins.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu2x %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c2x %s + +// std-no-diagnostics + +// 'index' is a builtin library function, but only in GNU mode. So this should +// give an error in GNU modes but be okay in non-GNU mode. +// FIXME: the error is correct, but these notes are pretty awful. +int index; // gnu-error {{redefinition of 'index' as different kind of symbol}} \ + gnu-note {{unguarded header; consider using #ifdef guards or #pragma once}} \ + gnu-note {{previous definition is here}} From d839f654586a4f3a84b334fcc2c986343a1d7f98 Mon Sep 17 00:00:00 2001 From: Philip Pfaffe Date: Wed, 2 Nov 2022 10:51:18 +0000 Subject: [PATCH 016/516] [wasm] Always treat DWARF expression addresses as load addresses When resolving absolute addresses for DW_OP_addr or DW_OP_addrx, these are always load addresses rather than file addresses in wasm. Reviewed By: DavidSpickett Differential Revision: https://reviews.llvm.org/D135664 --- lldb/source/Expression/DWARFExpression.cpp | 30 +++-- .../Expression/DWARFExpressionTest.cpp | 113 ++++++++++++++++++ 2 files changed, 136 insertions(+), 7 deletions(-) diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 1ccda944cd013..3f302e53c00e1 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -847,10 +847,12 @@ bool DWARFExpression::Evaluate( Process *process = nullptr; StackFrame *frame = nullptr; + Target *target = nullptr; if (exe_ctx) { process = exe_ctx->GetProcessPtr(); frame = exe_ctx->GetFramePtr(); + target = exe_ctx->GetTargetPtr(); } if (reg_ctx == nullptr && frame) reg_ctx = frame->GetRegisterContext().get(); @@ -906,12 +908,19 @@ bool DWARFExpression::Evaluate( // address and whose size is the size of an address on the target machine. case DW_OP_addr: stack.push_back(Scalar(opcodes.GetAddress(&offset))); - stack.back().SetValueType(Value::ValueType::FileAddress); - // Convert the file address to a load address, so subsequent - // DWARF operators can operate on it. - if (frame) - stack.back().ConvertToLoadAddress(module_sp.get(), - frame->CalculateTarget().get()); + if (target && + target->GetArchitecture().GetCore() == ArchSpec::eCore_wasm32) { + // wasm file sections aren't mapped into memory, therefore addresses can + // never point into a file section and are always LoadAddresses. + stack.back().SetValueType(Value::ValueType::LoadAddress); + } else { + stack.back().SetValueType(Value::ValueType::FileAddress); + // Convert the file address to a load address, so subsequent + // DWARF operators can operate on it. + if (frame) + stack.back().ConvertToLoadAddress(module_sp.get(), + frame->CalculateTarget().get()); + } break; // The DW_OP_addr_sect_offset4 is used for any location expressions in @@ -2507,7 +2516,14 @@ bool DWARFExpression::Evaluate( uint64_t index = opcodes.GetULEB128(&offset); lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index); stack.push_back(Scalar(value)); - stack.back().SetValueType(Value::ValueType::FileAddress); + if (target && + target->GetArchitecture().GetCore() == ArchSpec::eCore_wasm32) { + // wasm file sections aren't mapped into memory, therefore addresses can + // never point into a file section and are always LoadAddresses. + stack.back().SetValueType(Value::ValueType::LoadAddress); + } else { + stack.back().SetValueType(Value::ValueType::FileAddress); + } } break; // OPCODE: DW_OP_GNU_const_index diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index 35a064fc14bd8..4251eb0aecda9 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -8,6 +8,7 @@ #include "lldb/Expression/DWARFExpression.h" #include "Plugins/Platform/Linux/PlatformLinux.h" +#include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" #include "TestingSupport/Symbol/YAMLModuleTester.h" #include "lldb/Core/Debugger.h" @@ -401,3 +402,115 @@ TEST_F(DWARFExpressionMockProcessTest, DW_OP_deref) { Evaluate({DW_OP_lit4, DW_OP_deref, DW_OP_stack_value}, {}, {}, &exe_ctx), llvm::HasValue(GetScalar(32, 0x07060504, false))); } + +TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr) { + // Set up a wasm target + ArchSpec arch("wasm32-unknown-unknown-wasm"); + lldb::PlatformSP host_platform_sp = + platform_linux::PlatformLinux::CreateInstance(true, &arch); + ASSERT_TRUE(host_platform_sp); + Platform::SetHostPlatform(host_platform_sp); + lldb::DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + lldb::TargetSP target_sp; + lldb::PlatformSP platform_sp; + debugger_sp->GetTargetList().CreateTarget(*debugger_sp, "", arch, + lldb_private::eLoadDependentsNo, + platform_sp, target_sp); + + ExecutionContext exe_ctx(target_sp, false); + // DW_OP_addr takes a single operand of address size width: + uint8_t expr[] = {DW_OP_addr, 0x40, 0x0, 0x0, 0x0}; + DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, + /*addr_size*/ 4); + Value result; + Status status; + ASSERT_TRUE(DWARFExpression::Evaluate( + &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, + /*unit*/ nullptr, lldb::eRegisterKindLLDB, + /*initial_value_ptr*/ nullptr, + /*object_address_ptr*/ nullptr, result, &status)) + << status.ToError(); + + ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress); +} + +TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr_index) { + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_abbrev: + - Table: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_addr_base + Form: DW_FORM_sec_offset + + debug_info: + - Version: 5 + AddrSize: 4 + UnitType: DW_UT_compile + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x8 # Offset of the first Address past the header + - AbbrCode: 0x0 + + debug_addr: + - Version: 5 + AddressSize: 4 + Entries: + - Address: 0x1234 + - Address: 0x5678 +)"; + + // Can't use DWARFExpressionTester from above because subsystems overlap with + // the fixture. + SubsystemRAII subsystems; + llvm::Expected file = TestFile::fromYaml(yamldata); + EXPECT_THAT_EXPECTED(file, llvm::Succeeded()); + auto module_sp = std::make_shared(file->moduleSpec()); + auto *dwarf_cu = llvm::cast(module_sp->GetSymbolFile()) + ->DebugInfo() + .GetUnitAtIndex(0); + ASSERT_TRUE(dwarf_cu); + dwarf_cu->ExtractDIEsIfNeeded(); + + // Set up a wasm target + ArchSpec arch("wasm32-unknown-unknown-wasm"); + lldb::PlatformSP host_platform_sp = + platform_linux::PlatformLinux::CreateInstance(true, &arch); + ASSERT_TRUE(host_platform_sp); + Platform::SetHostPlatform(host_platform_sp); + lldb::DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + lldb::TargetSP target_sp; + lldb::PlatformSP platform_sp; + debugger_sp->GetTargetList().CreateTarget(*debugger_sp, "", arch, + lldb_private::eLoadDependentsNo, + platform_sp, target_sp); + + ExecutionContext exe_ctx(target_sp, false); + // DW_OP_addrx takes a single leb128 operand, the index in the addr table: + uint8_t expr[] = {DW_OP_addrx, 0x01}; + DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, + /*addr_size*/ 4); + Value result; + Status status; + ASSERT_TRUE(DWARFExpression::Evaluate( + &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, dwarf_cu, + lldb::eRegisterKindLLDB, + /*initial_value_ptr*/ nullptr, + /*object_address_ptr*/ nullptr, result, &status)) + << status.ToError(); + + ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress); + ASSERT_EQ(result.GetScalar().UInt(), 0x5678u); +} From 6f77979a4b1348d136995644c1f4ad37b50d6580 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Nov 2022 08:14:44 -0400 Subject: [PATCH 017/516] [InstCombine] add tests for logical-and / logical-or folds; NFC Similar to 29661fe94bf12ced1 - there are matching deficiencies and a potential crash lurking in these patterns. --- .../InstCombine/select-safe-transforms.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index dba59931235de..d34193bbe31e9 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -693,6 +693,23 @@ define i1 @orn_and_cmp_1_partial_logical(i37 %a, i37 %b, i1 %y) { ; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[AND]] ; CHECK-NEXT: ret i1 [[OR]] ; + %x = icmp sgt i37 %a, %b + %x_inv = icmp sle i37 %a, %b + %and = and i1 %x, %y + %or = select i1 %x_inv, i1 true, i1 %and + ret i1 %or +} + +define i1 @orn_and_cmp_1_partial_logical_commute(i37 %a, i37 %b) { +; CHECK-LABEL: @orn_and_cmp_1_partial_logical_commute( +; CHECK-NEXT: [[Y:%.*]] = call i1 @gen1() +; CHECK-NEXT: [[X:%.*]] = icmp sgt i37 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A]], [[B]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[AND]] +; CHECK-NEXT: ret i1 [[OR]] +; + %y = call i1 @gen1() ; thwart complexity-based canonicalization %x = icmp sgt i37 %a, %b %x_inv = icmp sle i37 %a, %b %and = and i1 %y, %x @@ -721,9 +738,37 @@ define i1 @orn_and_cmp_2_partial_logical(i16 %a, i16 %b, i1 %y) { ; CHECK-NEXT: [[OR:%.*]] = or i1 [[X_INV]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[OR]] ; + %x = icmp sge i16 %a, %b + %x_inv = icmp slt i16 %a, %b + %and = and i1 %x, %y + %or = select i1 %and, i1 true, i1 %x_inv + ret i1 %or +} + +define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) { +; CHECK-LABEL: @orn_and_cmp_2_partial_logical_commute( +; CHECK-NEXT: [[Y:%.*]] = call i1 @gen1() +; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[OR]] +; + %y = call i1 @gen1() ; thwart complexity-based canonicalization %x = icmp sge i16 %a, %b %x_inv = icmp slt i16 %a, %b %and = and i1 %y, %x %or = select i1 %and, i1 true, i1 %x_inv ret i1 %or } + +define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { +; CHECK-LABEL: @not_logical_and2( +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: ret <2 x i1> [[OR]] +; + %cond = icmp ult <2 x i32> %a, + %implied = icmp ugt <2 x i32> %a, + %and = select i1 %b, <2 x i1> %cond, <2 x i1> zeroinitializer + %or = select <2 x i1> %and, <2 x i1> , <2 x i1> %implied + ret <2 x i1> %or +} From b24e2f6ef6704652f52e1dc24e4e1cae5144fb7a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Nov 2022 08:21:42 -0400 Subject: [PATCH 018/516] [InstCombine] use logical-and matcher to avoid crash Follow-on to: ec0b406e16c44f1554 This should prevent crashing for example like issue #58552 by not matching a select-of-vectors-with-scalar-condition. The test that shows a regression seems unlikely to occur in real code. This also picks up an optimization in the case where a real (bitwise) logic op is used. We could already convert some similar select ops to real logic via impliesPoison(), so we don't see more diffs on commuted tests. Using commutative matchers (when safe) might also handle one of the TODO tests. --- .../InstCombine/InstCombineSelect.cpp | 9 +++-- .../InstCombine/select-safe-transforms.ll | 33 +++++++++++++++---- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5df7459e49851..f4ad343a614e4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2784,17 +2784,16 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { if (Res && *Res == false) return replaceOperand(SI, 1, A); } - // select c, true, (select a, b, false) -> select c, true, a - // select (select a, b, false), true, c -> select a, true, c + // select c, true, (a && b) -> select c, true, a + // select (a && b), true, c -> select a, true, c // if c = false implies that b = true - // FIXME: This should use m_LogicalAnd instead of matching a select operand. if (match(TrueVal, m_One()) && - match(FalseVal, m_Select(m_Value(A), m_Value(B), m_Zero()))) { + match(FalseVal, m_LogicalAnd(m_Value(A), m_Value(B)))) { Optional Res = isImpliedCondition(CondVal, B, DL, false); if (Res && *Res == true) return replaceOperand(SI, 2, A); } - if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) && + if (match(CondVal, m_LogicalAnd(m_Value(A), m_Value(B))) && match(TrueVal, m_One())) { Optional Res = isImpliedCondition(FalseVal, B, DL, false); if (Res && *Res == true) diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index d34193bbe31e9..e5c313c361d59 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -685,6 +685,8 @@ define i1 @orn_and_cmp_1_logical(i37 %a, i37 %b, i1 %y) { ret i1 %or } +; TODO: This should fold the same way as the next test. + define i1 @orn_and_cmp_1_partial_logical(i37 %a, i37 %b, i1 %y) { ; CHECK-LABEL: @orn_and_cmp_1_partial_logical( ; CHECK-NEXT: [[X:%.*]] = icmp sgt i37 [[A:%.*]], [[B:%.*]] @@ -703,10 +705,8 @@ define i1 @orn_and_cmp_1_partial_logical(i37 %a, i37 %b, i1 %y) { define i1 @orn_and_cmp_1_partial_logical_commute(i37 %a, i37 %b) { ; CHECK-LABEL: @orn_and_cmp_1_partial_logical_commute( ; CHECK-NEXT: [[Y:%.*]] = call i1 @gen1() -; CHECK-NEXT: [[X:%.*]] = icmp sgt i37 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A]], [[B]] -; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X]] -; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[AND]] +; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[Y]] ; CHECK-NEXT: ret i1 [[OR]] ; %y = call i1 @gen1() ; thwart complexity-based canonicalization @@ -760,10 +760,31 @@ define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) { ret i1 %or } +; PR58552 - this would crash trying to replace non-matching types + +define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { +; CHECK-LABEL: @not_logical_and( +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> , <2 x i1> [[AND]] +; CHECK-NEXT: ret <2 x i1> [[OR]] +; + %cond = icmp ult <2 x i32> %a, + %implied = icmp ugt <2 x i32> %a, + %and = select i1 %b, <2 x i1> %cond, <2 x i1> zeroinitializer + %or = select <2 x i1> %implied, <2 x i1> , <2 x i1> %and + ret <2 x i1> %or +} + +; This could reduce, but we do not match select-of-vectors with scalar condition as logical-and. + define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { ; CHECK-LABEL: @not_logical_and2( -; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> , <2 x i1> [[IMPLIED]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; %cond = icmp ult <2 x i32> %a, From dd927f47e78bfdd89b62d5ef1ccd4b0a9b05386f Mon Sep 17 00:00:00 2001 From: Dmitry Makogon Date: Wed, 2 Nov 2022 19:55:56 +0700 Subject: [PATCH 019/516] [Test] Add test exposing crash in SimpleLoopUnswitch The test crashes with an assert in unswitchNontrivialInvariants. --- ...trivial-unswitch-skip-selects-in-guards.ll | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll new file mode 100644 index 0000000000000..7f8862e160a02 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll @@ -0,0 +1,36 @@ +; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +declare ptr @pluto() +declare void @llvm.experimental.guard(i1, ...) +declare void @widget() + +; REQUIRES: asserts +; XFAIL: * + +define void @foo(ptr addrspace(1) %arg, i64 %arg1) personality ptr @pluto { +bb: + %tmp = icmp slt i32 poison, 570 + %tmp2 = select i1 %tmp, i1 true, i1 false + br label %bb3 + +bb3: ; preds = %bb6, %bb + call void (i1, ...) @llvm.experimental.guard(i1 %tmp2, i32 7) [ "deopt"() ] + invoke void @widget() + to label %bb4 unwind label %bb7 + +bb4: ; preds = %bb3 + invoke void @widget() + to label %bb6 unwind label %bb7 + +bb6: ; preds = %bb4 + invoke void @widget() + to label %bb3 unwind label %bb7 + +bb7: ; preds = %bb6, %bb4, %bb3 + %tmp8 = landingpad { ptr, i32 } + cleanup + ret void +} + From 534638eae31a51eca76de8b829b81e6c1fee083b Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 2 Nov 2022 12:36:54 +0100 Subject: [PATCH 020/516] [mlir][linalg] Fix crash in canonicalization pattern This crash was due to incorrect usage of `hasTensorSemantics`, which has changed recently with DestinationStyleOpInterface. An op has tensor semantics if all of its inits and inputs are tensors. Previously, only inits needed to be tensors. Differential Revision: https://reviews.llvm.org/D137243 --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 7 +++---- mlir/test/Dialect/Linalg/canonicalize.mlir | 24 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index ae2514c3eecdb..568b9317ca364 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -935,10 +935,9 @@ struct DeduplicateAndRemoveDeadOperandsAndResults // Create the new op with the body being empty. Location loc = genericOp.getLoc(); SmallVector newResultTypes; - if (genericOp.hasTensorSemantics()) { - newResultTypes = llvm::to_vector(llvm::map_range( - newOutputOperands, [](Value v) { return v.getType(); })); - } + for (Value v : newOutputOperands) + if (v.getType().isa()) + newResultTypes.push_back(v.getType()); auto newOp = rewriter.create( loc, newResultTypes, newInputOperands, newOutputOperands, rewriter.getAffineMapArrayAttr(newIndexingMaps), diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 8f8f6000966e2..3f1118334ef5c 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -846,3 +846,27 @@ func.func @identity_mixed(%arg0 : tensor, %arg1: memref) { // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: } ins(%[[ARG1]] : tensor) // CHECK-SAME: outs(%[[ARG2]] : memref) { + +// ----- + +// Just make sure that we don't crash. + +// CHECK-LABEL: func @dedeplicate_regression_test +func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) { + %36 = linalg.generic + {indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins(%1, %1 : memref<4xf32>, memref<4xf32>) + outs(%0 : tensor<4xf32>) { + ^bb0(%in: f32, %in_24: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<4xf32> + %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + outs(%36 : tensor<4xf32>) { + ^bb0(%out: f32): + linalg.yield %out : f32 + } -> tensor<4xf32> + return +} From 5fe9273c73969791795e5302933abadc9f33f09a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 14:21:38 +0100 Subject: [PATCH 021/516] [BasicAA] Re-enable cs-cs-arm.ll test (PR58738) Fixes https://github.com/llvm/llvm-project/issues/58738. --- llvm/test/Analysis/BasicAA/cs-cs-arm.ll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll index d6a9976590778..6bf321b09201a 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll @@ -1,8 +1,5 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; REQUIRES: arm-registered-target -; This hasn't been run in a long time and it no longer matches reality. -; Filed issue #58738. -; XFAIL: * target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" @@ -17,16 +14,18 @@ entry: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind %c = add <8 x i16> %a, %b + load i8, i8* %p + load i8, i8* %q ret <8 x i16> %c ; CHECK-LABEL: Function: test1: ; CHECK: NoAlias: i8* %p, i8* %q -; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) From 88d34d46260e62ee4640b4fa50d3f3e70c75f198 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Thu, 20 Oct 2022 15:40:14 +0100 Subject: [PATCH 022/516] [DebugInfo] Fix minor debug info bug in deleteDeadLoop Using a DebugVariable as the set key rather than std::pair ensures we don't accidently confuse multiple instances of inlined variables. Reviewed By: jryans Differential Revision: https://reviews.llvm.org/D133303 --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 7 +- .../Generic/loop-deletion-inline-var.ll | 103 ++++++++++++++++++ 2 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index c007a5990f337..636392ae810b7 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -594,7 +594,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } // Use a map to unique and a vector to guarantee deterministic ordering. - llvm::SmallDenseSet, 4> DeadDebugSet; + llvm::SmallDenseSet DeadDebugSet; llvm::SmallVector DeadDebugInst; if (ExitBlock) { @@ -623,11 +623,8 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, auto *DVI = dyn_cast(&I); if (!DVI) continue; - auto Key = - DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); - if (Key != DeadDebugSet.end()) + if (!DeadDebugSet.insert(DebugVariable(DVI)).second) continue; - DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); DeadDebugInst.push_back(DVI); } diff --git a/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll new file mode 100644 index 0000000000000..372fc31681943 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll @@ -0,0 +1,103 @@ +; RUN: opt -S %s -passes=loop-deletion | FileCheck %s + +;; Generated from this C source: +;; static int f(int p) { return p * p * 2; } +;; static int zero() { return 0; } +;; void fun() { +;; for (int __attribute__((nodebug)) i = zero(); i < 0; ++i) { +;; f(i); +;; f(i + 1); +;; } +;; } +;; +;; Check that loop-deletion doesn't accidently mistake debug intrinsics for +;; different inlined instances of a variable as the same variable. + +; CHECK-LABEL: for.cond.cleanup: ; preds = %entry +; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P:[0-9]+]],{{.+}}), !dbg ![[DBG1:[0-9]+]] +; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P]], {{.+}}), !dbg ![[DBG2:[0-9]+]] + +; CHECK-DAG: ![[P]] = !DILocalVariable(name: "p", +; CHECK-DAG: ![[DBG1]] = !DILocation({{.+}}, inlinedAt: ![[IA1:[0-9]+]]) +; CHECK-DAG: ![[DBG2]] = !DILocation({{.+}}, inlinedAt: ![[IA2:[0-9]+]]) +; CHECK-DAG: ![[IA1]] = distinct !DILocation(line: 5, +; CHECK-DAG: ![[IA2]] = distinct !DILocation(line: 6, + +define dso_local void @fun() !dbg !9 { +entry: + br label %for.cond, !dbg !13 + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ], !dbg !15 + %cmp = icmp slt i32 %i.0, 0, !dbg !16 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !18 + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + call void @llvm.dbg.value(metadata i32 %i.0, metadata !19, metadata !DIExpression()), !dbg !25 + %mul.i = mul nsw i32 %i.0, %i.0, !dbg !28 + %mul1.i = mul nsw i32 %mul.i, 2, !dbg !29 + %add = add nsw i32 %i.0, 1, !dbg !30 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !31 + %mul.i1 = mul nsw i32 %add, %add, !dbg !33 + %mul1.i2 = mul nsw i32 %mul.i1, 2, !dbg !34 + br label %for.inc, !dbg !35 + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1, !dbg !36 + br label %for.cond, !dbg !37, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !41 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 16.0.0"} +!9 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!10 = !DISubroutineType(types: !11) +!11 = !{null} +!12 = !{} +!13 = !DILocation(line: 4, column: 8, scope: !14) +!14 = distinct !DILexicalBlock(scope: !9, file: !1, line: 4, column: 3) +!15 = !DILocation(line: 4, scope: !14) +!16 = !DILocation(line: 4, column: 51, scope: !17) +!17 = distinct !DILexicalBlock(scope: !14, file: !1, line: 4, column: 3) +!18 = !DILocation(line: 4, column: 3, scope: !14) +!19 = !DILocalVariable(name: "p", arg: 1, scope: !20, file: !1, line: 1, type: !23) +!20 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !21, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24) +!21 = !DISubroutineType(types: !22) +!22 = !{!23, !23} +!23 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!24 = !{!19} +!25 = !DILocation(line: 0, scope: !20, inlinedAt: !26) +!26 = distinct !DILocation(line: 5, column: 5, scope: !27) +!27 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 61) +!28 = !DILocation(line: 1, column: 32, scope: !20, inlinedAt: !26) +!29 = !DILocation(line: 1, column: 36, scope: !20, inlinedAt: !26) +!30 = !DILocation(line: 6, column: 8, scope: !27) +!31 = !DILocation(line: 0, scope: !20, inlinedAt: !32) +!32 = distinct !DILocation(line: 6, column: 5, scope: !27) +!33 = !DILocation(line: 1, column: 32, scope: !20, inlinedAt: !32) +!34 = !DILocation(line: 1, column: 36, scope: !20, inlinedAt: !32) +!35 = !DILocation(line: 7, column: 3, scope: !27) +!36 = !DILocation(line: 4, column: 56, scope: !17) +!37 = !DILocation(line: 4, column: 3, scope: !17) +!38 = distinct !{!38, !18, !39, !40} +!39 = !DILocation(line: 7, column: 3, scope: !14) +!40 = !{!"llvm.loop.mustprogress"} +!41 = !DILocation(line: 8, column: 1, scope: !9) From f03b069c5b70b59a9cb391a4c41250083aa6b2b4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Nov 2022 09:15:11 -0400 Subject: [PATCH 023/516] [InstCombine] fold mul with decremented "shl -1" factor (2nd try) This is a corrected version of: bc886e9b587b I made a copy-paste error that created an "add" instead of the intended "sub" on that attempt. The regression tests showed the bug, but I overlooked that. As I said in a comment on issue #58717, the bug reports resulting from the botched patch confirm that the pattern does occur in many real-world applications, so hopefully eliminating the multiply results in better code. I added one more regression test in this version of the patch, and here's an Alive2 proof to show that exact example: https://alive2.llvm.org/ce/z/dge7VC Original commit message: This is a sibling to: 6064e92b0a84 ...but we canonicalize the shl+add to shl+xor, so the pattern is different than I expected: https://alive2.llvm.org/ce/z/8CX16e I have not found any patterns that are safe to propagate no-wrap, so that is not included here. Differential Revision: https://reviews.llvm.org/D137157 --- .../InstCombine/InstCombineMulDivRem.cpp | 13 +++++- llvm/test/Transforms/InstCombine/mul.ll | 44 ++++++++++--------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index e4fccda750e6c..abc88e35cf2af 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -140,7 +140,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I, return nullptr; } -/// Reduce integer multiplication patterns that contain a (1 << Z) factor. +/// Reduce integer multiplication patterns that contain a (+/-1 << Z) factor. /// Callers are expected to call this twice to handle commuted patterns. static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, InstCombiner::BuilderTy &Builder) { @@ -171,6 +171,17 @@ static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW); } + // Similar to above, but a decrement of the shifted value is disguised as + // 'not' and becomes a sub: + // X * (~(-1 << Z)) --> X * ((1 << Z) - 1) --> (X << Z) - X + // This increases uses of X, so it may require a freeze, but that is still + // expected to be an improvement because it removes the multiply. + if (match(Y, m_OneUse(m_Not(m_OneUse(m_Shl(m_AllOnes(), m_Value(Z))))))) { + Value *FrX = Builder.CreateFreeze(X, X->getName() + ".fr"); + Value *Shl = Builder.CreateShl(FrX, Z, "mulshl"); + return Builder.CreateSub(Shl, FrX, Mul.getName()); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 5f0e6ceac449d..4cb2468b08753 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -227,12 +227,14 @@ define i32 @shl1_increment_use(i32 %x, i32 %y) { ret i32 %m } +; ((-1 << x) ^ -1) * y --> (y << x) - y + define i8 @shl1_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -243,10 +245,9 @@ define i8 @shl1_decrement(i8 %x, i8 %y) { define i8 @shl1_decrement_commute(i8 %x, i8 noundef %p) { ; CHECK-LABEL: @shl1_decrement_commute( ; CHECK-NEXT: [[Y:%.*]] = ashr i8 [[P:%.*]], 1 -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[NOTMASK]], -1 -; CHECK-NEXT: [[M:%.*]] = mul i8 [[Y]], [[X1]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y]] +; CHECK-NEXT: ret i8 [[M1]] ; %y = ashr i8 %p, 1 ; thwart complexity-based canonicalization %pow2x = shl i8 1, %x @@ -257,10 +258,10 @@ define i8 @shl1_decrement_commute(i8 %x, i8 noundef %p) { define i8 @shl1_nuw_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_nuw_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -270,10 +271,10 @@ define i8 @shl1_nuw_decrement(i8 %x, i8 %y) { define i8 @shl1_nsw_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_nsw_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl nsw i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl nsw i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -281,6 +282,8 @@ define i8 @shl1_nsw_decrement(i8 %x, i8 %y) { ret i8 %m } +; negative test - extra use would require more instructions + define i32 @shl1_decrement_use(i32 %x, i32 %y) { ; CHECK-LABEL: @shl1_decrement_use( ; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i32 -1, [[X:%.*]] @@ -296,12 +299,13 @@ define i32 @shl1_decrement_use(i32 %x, i32 %y) { ret i32 %m } +; the fold works for vectors too and if 'y' is a constant, sub becomes add + define <2 x i8> @shl1_decrement_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl1_decrement_vec( -; CHECK-NEXT: [[POW2X:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor <2 x i8> [[POW2X]], -; CHECK-NEXT: [[M:%.*]] = mul <2 x i8> [[X1]], -; CHECK-NEXT: ret <2 x i8> [[M]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[MULSHL]], +; CHECK-NEXT: ret <2 x i8> [[M1]] ; %pow2x = shl <2 x i8> , %x %x1 = xor <2 x i8> %pow2x, From 33c7ae55e729069be754f56c4d4606cdeddd377b Mon Sep 17 00:00:00 2001 From: OCHyams Date: Wed, 2 Nov 2022 13:42:52 +0000 Subject: [PATCH 024/516] [Assignment Tracking][1/*] Add initial docs for Assignment Tracking The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Add documentation outlining the intent and design. --- llvm/docs/AssignmentTracking.md | 233 +++++++++++++++++++++++++++++ llvm/docs/HowToUpdateDebugInfo.rst | 9 ++ llvm/docs/SourceLevelDebugging.rst | 30 ++++ 3 files changed, 272 insertions(+) create mode 100644 llvm/docs/AssignmentTracking.md diff --git a/llvm/docs/AssignmentTracking.md b/llvm/docs/AssignmentTracking.md new file mode 100644 index 0000000000000..dfb5add3d4f74 --- /dev/null +++ b/llvm/docs/AssignmentTracking.md @@ -0,0 +1,233 @@ +# Debug Info Assignment Tracking + +Assignment Tracking is an alternative technique for tracking variable location +debug info through optimisations in LLVM. It provides accurate variable +locations for assignments where a local variable (or a field of one) is the +LHS. In rare and complicated circumstances indirect assignments might be +optimized away without being tracked, but otherwise we make our best effort to +track all variable locations. + +The core idea is to track more information about source assignments in order +and preserve enough information to be able to defer decisions about whether to +use non-memory locations (register, constant) or memory locations until after +middle end optimisations have run. This is in opposition to using +`llvm.dbg.declare` and `llvm.dbg.value`, which is to make the decision for most +variables early on, which can result in suboptimal variable locations that may +be either incorrect or incomplete. + +A secondary goal of assignment tracking is to cause minimal additional work for +LLVM pass writers, and minimal disruption to LLVM in general. + +## Status and usage + +**Status**: Experimental work in progress. Enabling is strongly advised against +except for development and testing. + +**Enable in Clang**: `-Xclang -fexperimental-assignment-tracking` + +**Enable in LLVM tools**: `-experimental-assignment-tracking` + +## Design and implementation + +### Assignment markers: `llvm.dbg.assign` + +`llvm.dbg.value`, a conventional debug intrinsic, marks out a position in the +IR where a variable takes a particular value. Similarly, Assignment Tracking +marks out the position of assignments with a new intrinsic called +`llvm.dbg.assign`. + +In order to know where in IR it is appropriate to use a memory location for a +variable, each assignment marker must in some way refer to the store, if any +(or multiple!), that performs the assignment. That way, the position of the +store and marker can be considered together when making that choice. Another +important benefit of referring to the store is that we can then build a two-way +mapping of stores<->markers that can be used to find markers that need to be +updated when stores are modified. + +An `llvm.dbg.assign` marker that is not linked to any instruction signals that +the store that performed the assignment has been optimised out, and therefore +the memory location will not be valid for at least some part of the program. + +Here's the `llvm.dbg.assign` signature. Each parameter is wrapped in +`MetadataAsValue`, and `Value *` type parameters are first wrapped in +`ValueAsMetadata`: + +``` +void @llvm.dbg.assign(Value *Value, + DIExpression *ValueExpression, + DILocalVariable *Variable, + DIAssignID *ID, + Value *Address, + DIExpression *AddressExpression) +``` + +The first three parameters look and behave like an `llvm.dbg.value`. `ID` is a +reference to a store (see next section). `Address` is the destination address +of the store and it is modified by `AddressExpression`. LLVM currently encodes +variable fragment information in `DIExpression`s, so as an implementation quirk +the `FragmentInfo` for `Variable` is contained within `ValueExpression` only. + +The formal LLVM-IR signature is: +``` +void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) +``` + +### Instruction link: `DIAssignID` + +`DIAssignID` metadata is the mechanism that is currently used to encode the +store<->marker link. The metadata node has no operands and all instances are +`distinct`; equality is checked for by comparing addresses. + +`llvm.dbg.assign` intrinsics use a `DIAssignID` metadata node instance as an +operand. This way it refers to any store-like instruction that has the same +`DIAssignID` attachment. E.g. For this test.cpp, + +``` +int fun(int a) { + return a; +} +``` +compiled without optimisations: +``` +$ clang++ test.cpp -o test.ll -emit-llvm -S -g -O0 -Xclang -fexperimental-assignment-tracking +``` +we get: +``` +define dso_local noundef i32 @_Z3funi(i32 noundef %a) #0 !dbg !8 { +entry: + %a.addr = alloca i32, align 4, !DIAssignID !13 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(), metadata !13, metadata i32* %a.addr, metadata !DIExpression()), !dbg !15 + store i32 %a, i32* %a.addr, align 4, !DIAssignID !16 + call void @llvm.dbg.assign(metadata i32 %a, metadata !14, metadata !DIExpression(), metadata !16, metadata i32* %a.addr, metadata !DIExpression()), !dbg !15 + %0 = load i32, i32* %a.addr, align 4, !dbg !17 + ret i32 %0, !dbg !18 +} + +... +!13 = distinct !DIAssignID() +!14 = !DILocalVariable(name: "a", ...) +... +!16 = distinct !DIAssignID() +``` + +The first `llvm.dbg.assign` refers to the `alloca` through `!DIAssignID !13`, +and the second refers to the `store` through `!DIAssignID !16`. + +### Store-like instructions + +In the absence of a linked `llvm.dbg.assign`, a store to an address that is +known to be the backing storage for a variable is considered to represent an +assignment to that variable. + +This gives us a safe fall-back in cases where `llvm.dbg.assign` intrinsics have +been deleted, the `DIAssignID` attachment on the store has been dropped, or the +optimiser has made a once-indirect store (not tracked with Assignment Tracking) +direct. + +### Middle-end: Considerations for pass-writers + +#### Non-debug instruction updates + +**Cloning** an instruction: nothing new to do. Cloning automatically clones a +`DIAssignID` attachment. Multiple instructions may have the same `DIAssignID` +instruction. In this case, the assignment is considered to take place in +multiple positions in the program. + +**Moving** a non-debug instruction: nothing new to do. Instructions linked to an +`llvm.dbg.assign` have their initial IR position marked by the position of the +`llvm.dbg.assign`. + +**Deleting** a non-debug instruction: nothing new to do. Simple DSE does not +require any change; it’s safe to delete an instruction with a `DIAssignID` +attachment. An `llvm.dbg.assign` that uses a `DIAssignID` that is not attached +to any instruction indicates that the memory location isn’t valid. + +**Merging** stores: In many cases no change is required as `DIAssignID` +attachments are automatically merged if `combineMetadata` is called. One way or +another, the `DIAssignID` attachments must be merged such that new store +becomes linked to all the `llvm.dbg.assign` intrinsics that the merged stores +were linked to. This can be achieved simply by calling a helper function +`Instruction::mergeDIAssignID`. + +**Inlining** stores: As stores are inlined we generate `llvm.dbg.assign` +intrinsics and `DIAssignID` attachments as if the stores represent source +assignments, just like the in frontend. This isn’t perfect, as stores may have +been moved, modified or deleted before inlining, but it does at least keep the +information about the variable correct within the non-inlined scope. + +**Splitting** stores: SROA and passes that split stores treat `llvm.dbg.assign` +intrinsics similarly to `llvm.dbg.declare` intrinsics. Clone the +`llvm.dbg.assign` intrinsics linked to the store, update the FragmentInfo in +the `ValueExpression`, and give the split stores (and cloned intrinsics) new +`DIAssignID` attachments each. In other words, treat the split stores as +separate assignments. For partial DSE (e.g. shortening a memset), we do the +same except that `llvm.dbg.assign` for the dead fragment gets an `Undef` +`Address`. + +**Promoting** allocas and store/loads: `llvm.dbg.assign` intrinsics implicitly +describe joined values in memory locations at CFG joins, but this is not +necessarily the case after promoting (or partially promoting) the +variable. Passes that promote variables are responsible for inserting +`llvm.dbg.assign` intrinsics after the resultant PHIs generated during +promotion. `mem2reg` already has to do this (with `llvm.dbg.value`) for +`llvm.dbg.declare`s. Where a store has no linked intrinsic, the store is +assumed to represent an assignment for variables stored at the destination +address. + +#### Debug intrinsic updates + +**Moving** a debug intrinsic: avoid moving `llvm.dbg.assign` intrinsics where +possible, as they represent a source-level assignment, whose position in the +program should not be affected by optimization passes. + +**Deleting** a debug intrinsic: Nothing new to do. Just like for conventional +debug intrinsics, unless it is unreachable, it’s almost always incorrect to +delete a `llvm.dbg.assign` intrinsic. + +### Lowering `llvm.dbg.assign` to MIR + +To begin with only SelectionDAG ISel will be supported. `llvm.dbg.assign` +intrinsics are lowered to MIR `DBG_INSTR_REF` instructions. Before this happens +we need to decide where it is appropriate to use memory locations and where we +must use a non-memory location (or no location) for each variable. In order to +make those decisions we run a standard fixed-point dataflow analysis that makes +the choice at each instruction, iteratively joining the results for each block. + +### TODO list + +As this is an experimental work in progress so there are some items we still need +to tackle: + +* LLVM is trying to replace usage of `Undef` with `Poison`. Use `Poison` rather + than `Undef` as the sentinal to denote "unknown location" for the address. See + D133293. This will be unecessary if the address can be removed, as described + below. + +* The system expects locals to be backed by a local alloca. This isn't always + the case - sometimes a pointer to storage is passed into a function + (e.g. sret, byval). We need to be able to handle those cases. See + llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll and + clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp for examples. + +* `trackAssignments` doesn't yet work for variables that have their + `llvm.dbg.declare` location modified by a `DIExpression`, e.g. when the + address of the variable is itself stored in an `alloca` with the + `llvm.dbg.declare` using `DIExpression(DW_OP_deref)`. See `indirectReturn` in + llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll and in + clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp for an + example. + +* In order to solve the first bullet-point we need to be able to specify that a + memory location is available without using a `DIAssignID`. This is because + the storage address is not computed by an instruction (it's an argument + value) and therefore we have nowhere to put the metadata attachment. To solve + this we probably need another marker intrinsic to denote "the variable's + stack home is X address" - similar to `llvm.dbg.declare` and `llvm.dbg.addr` + except that it needs to compose with `llvm.dbg.assign` intrinsics such that + the stack home address is only selected as a location for the variable when + the `llvm.dbg.assign` intrinsics agree it should be. + +* Given the above (a special "the stack home is X" intrinsic), and the fact + that we can only track assignments with fixed offsets and sizes, I think we + can probably get rid of the address and address-expression part, since it + will always be computable with the info we have. diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 904ba71b965d3..c64b5d1d0d98b 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -217,6 +217,15 @@ Deleting a MIR-level MachineInstr TODO +Rules for updating ``DIAssignID`` Attachments +============================================= + +``DIAssignID`` metadata attachments are used by Assignment Tracking, which is +currently an experimental debug mode. + +See :doc:`AssignmentTracking` for how to update them and for more info on +Assignment Tracking. + How to automatically convert tests into debug info tests ======================================================== diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index a9ce60029b448..5a81ad01fdf10 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -251,6 +251,36 @@ directly, not its address. Note that the value operand of this intrinsic may be indirect (i.e, a pointer to the source variable), provided that interpreting the complex expression derives the direct value. +``llvm.dbg.assign`` +^^^^^^^^^^^^^^^^^^ + +.. code-block:: llvm + + void @llvm.dbg.assign(Value *Value, + DIExpression *ValueExpression, + DILocalVariable *Variable, + DIAssignID *ID, + Value *Address, + DIExpression *AddressExpression) + +This intrinsic marks the position in IR where a source assignment occured. It +encodes the value of the variable. It references the store, if any, that +performs the assignment, and the destination address. + +The first three arguments are the same as for an ``llvm.dbg.value``. The fourth +argument is a ``DIAssignID`` used to reference a store. The fifth is the +destination of the store (wrapped as metadata), and the sixth is a `complex +expression `_ that modfies it. + +The formal LLVM-IR signature is: + +.. code-block:: llvm + + void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + + +See :doc:`AssignmentTracking` for more info. + Object lifetimes and scoping ============================ From 8f6c478fbfdd018d6f05b39e9440431c40e5bcef Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 14:47:57 +0100 Subject: [PATCH 025/516] [CVP] Add test for icmp without constant operand (NFC) We should still be able to fold comparisons based on ranged in this case. --- .../CorrelatedValuePropagation/icmp.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index 4dc9eb34f86ce..cb3914742f9d7 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -1172,4 +1172,49 @@ if.false: ret void } +define void @non_const_range(i32 %a, i32 %b) { +; CHECK-LABEL: @non_const_range( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 11 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[B:%.*]], 21 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP1]], i1 [[CMP2]], i1 false +; CHECK-NEXT: br i1 [[AND]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A_100:%.*]] = add nuw nsw i32 [[A]], 100 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A_100]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP3]]) +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[A_100]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP4]]) +; CHECK-NEXT: [[A_10:%.*]] = add nuw nsw i32 [[A]], 10 +; CHECK-NEXT: [[CMP5:%.*]] = icmp ne i32 [[A_10]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP5]]) +; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[A_10]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP6]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %cmp1 = icmp ult i32 %a, 11 + %cmp2 = icmp ult i32 %b, 21 + %and = select i1 %cmp1, i1 %cmp2, i1 false + br i1 %and, label %if, label %else + +if: + %a.100 = add nuw nsw i32 %a, 100 + %cmp3 = icmp ne i32 %a.100, %b + call void @check1(i1 %cmp3) + %cmp4 = icmp eq i32 %a.100, %b + call void @check1(i1 %cmp4) + + %a.10 = add nuw nsw i32 %a, 10 + %cmp5 = icmp ne i32 %a.10, %b + call void @check1(i1 %cmp5) + %cmp6 = icmp eq i32 %a.10, %b + call void @check1(i1 %cmp6) + ret void + +else: + ret void +} + + attributes #4 = { noreturn } From 88ac25b357aa2ac96fd1e44cd9fd12d2f1dd189a Mon Sep 17 00:00:00 2001 From: John Brawn Date: Thu, 27 Oct 2022 14:14:57 +0100 Subject: [PATCH 026/516] [MachineCSE] Allow PRE of instructions that read physical registers Currently MachineCSE forbids PRE when the instruction reads a physical register. Relax this so that it's allowed when the value being read is the same as what would be read in the place the instruction would be hoisted to. This is being done in preparation for adding FPCR handling to the AArch64 backend, in order to prevent it to from worsening the generated code, but for targets that already have a similar register it should improve things. This patch affects code generation in several tests. The new code looks better except for in Thumb2/LowOverheadLoops/memcall.ll where we perform PRE but the LowOverheadLoops transformation then undoes it. Also in AMDGPU/selectcc-opt.ll the CHECK makes things look worse, but actually the function as a whole is better (as a MOV is PRE'd). Differential Revision: https://reviews.llvm.org/D136675 --- llvm/lib/CodeGen/MachineCSE.cpp | 32 ++- .../GlobalISel/hip.extern.shared.array.ll | 2 - .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 221 +++++++++--------- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 207 ++++++++-------- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll | 1 + llvm/test/CodeGen/ARM/machine-cse-cmp.ll | 38 +++ .../CodeGen/PowerPC/machine-cse-rm-pre.mir | 173 ++++++++++++++ .../Thumb2/LowOverheadLoops/memcall.ll | 16 +- 8 files changed, 455 insertions(+), 235 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 2de5879e26b09..3a8c80cbddf68 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -145,7 +145,7 @@ namespace { DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); - bool isPRECandidate(MachineInstr *MI); + bool isPRECandidate(MachineInstr *MI, SmallSet &PhysRefs); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); /// Heuristics to see if it's profitable to move common computations of MBB @@ -798,7 +798,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { // We use stronger checks for PRE candidate rather than for CSE ones to embrace // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps // to exclude instrs created by PRE that won't be CSEed later. -bool MachineCSE::isPRECandidate(MachineInstr *MI) { +bool MachineCSE::isPRECandidate(MachineInstr *MI, + SmallSet &PhysRefs) { if (!isCSECandidate(MI) || MI->isNotDuplicable() || MI->mayLoad() || @@ -807,13 +808,14 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { MI->getNumExplicitDefs() != 1) return false; - for (const auto &def : MI->defs()) - if (!Register::isVirtualRegister(def.getReg())) - return false; - - for (const auto &use : MI->uses()) - if (use.isReg() && !Register::isVirtualRegister(use.getReg())) - return false; + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + return false; + else + PhysRefs.insert(MO.getReg()); + } + } return true; } @@ -822,7 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) { bool Changed = false; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { - if (!isPRECandidate(&MI)) + SmallSet PhysRefs; + if (!isPRECandidate(&MI, PhysRefs)) continue; if (!PREMap.count(&MI)) { @@ -858,6 +861,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (MI.isConvergent() && CMBB != MBB) continue; + // If this instruction uses physical registers then we can only do PRE + // if it's using the value that is live at the place we're hoisting to. + bool NonLocal; + PhysDefVector PhysDefs; + if (!PhysRefs.empty() && + !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, + PhysDefs, NonLocal)) + continue; + assert(MI.getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); Register VReg = MI.getOperand(0).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll index a6a3237ee929f..d6c675a636e9a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -22,8 +22,6 @@ define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 346b7d2deb18b..4ddd0c6583104 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -191,131 +191,131 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6 +; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -326,9 +326,8 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index f2ad41481eca4..3fd860ab72e39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -188,130 +188,130 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], s1, v0 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s2, v3 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -322,9 +322,8 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll index 8e9409188daad..563d86daa55cb 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -33,6 +33,7 @@ ENDIF: ; EG-LABEL: {{^}}test_b: ; EG: SET{{[GTEQN]+}}_DX10 +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) ; EG-NEXT: PRED_ ; EG-NEXT: ALU clause starting define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) { diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll index ab5f58c27e768..14565a7172827 100644 --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -80,3 +80,41 @@ return: %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] ret i8* %retval.0 } + +; The cmp of %val should not be hoisted above the preceding conditional branch +define void @f4(i32** %ptr1, i64* %ptr2, i64 %val) { +entry: +; CHECK-LABEL: f4: +; CHECK: cmp +; CHECK: movne +; CHECK: strne +; CHECK: orrs +; CHECK-NOT: subs +; CHECK-NOT: sbcs +; CHECK: beq + %tobool.not = icmp eq i32** %ptr1, null + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + store i32* null, i32** %ptr1, align 4 + br label %if.end + +if.end: +; CHECK: subs +; CHECK: sbcs +; CHECK: bxlt lr + %tobool1 = icmp ne i64 %val, 0 + %cmp = icmp slt i64 %val, 10 + %or.cond = and i1 %tobool1, %cmp + br i1 %or.cond, label %cleanup, label %if.end3 + +if.end3: +; CHECK: subs +; CHECK: sbc + %sub = add nsw i64 %val, -10 + store i64 %sub, i64* %ptr2, align 8 + br label %cleanup + +cleanup: + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir new file mode 100644 index 0000000000000..36484be012362 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=powerpc-unknown-unknown -run-pass=machine-cse -verify-machineinstrs | FileCheck %s +--- | + define void @can_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } + + define void @cannot_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } +... +--- +name: can_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: can_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: %6:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... +--- +name: cannot_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } + - { id: 6, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: cannot_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SETRND:%[0-9]+]]:f8rc = SETRND [[COPY2]], implicit-def $rm, implicit $rm + ; CHECK-NEXT: %0:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %1:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %6:f8rc = SETRND %3, implicit-def $rm, implicit $rm + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll index 040e026e6a80a..f0495def81858 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -277,23 +277,23 @@ for.body: ; preds = %entry, %for.body define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) { ; CHECK-LABEL: test_memset_preheader: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cbz r2, .LBB6_5 ; CHECK-NEXT: @ %bb.1: @ %prehead ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB6_3 ; CHECK-NEXT: .LBB6_2: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vstrb.8 q0, [r12], #16 +; CHECK-NEXT: vstrb.8 q0, [r4], #16 ; CHECK-NEXT: letp lr, .LBB6_2 ; CHECK-NEXT: .LBB6_3: @ %prehead ; CHECK-NEXT: dls lr, r2 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: .LBB6_4: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r3, [r12], #1 -; CHECK-NEXT: strb r3, [r1], #1 +; CHECK-NEXT: ldrb r4, [r3], #1 +; CHECK-NEXT: strb r4, [r1], #1 ; CHECK-NEXT: le lr, .LBB6_4 ; CHECK-NEXT: .LBB6_5: @ %for.cond.cleanup ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -302,7 +302,7 @@ define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) { ; CHECK-NEXT: vstrb.8 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_6 ; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %cmp6 = icmp ne i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup From 2d8c1597e51c39d8db1c9428d65e6ef6d6a1d5c1 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Fri, 28 Oct 2022 14:17:01 +0100 Subject: [PATCH 027/516] [MIRVRegNamer] Avoid opcode hash collision D121929 happens to cause CodeGen/MIR/AArch64/mirnamer.mir to fail due to a hash collision caused by adding two extra opcodes. The collision is only in the top 19 bits of the hashed opcode so fix this by just using the whole hash (in fixed width hex for consistency) instead of the top 5 decimal digits. Differential Revision: https://reviews.llvm.org/D137155 --- llvm/lib/CodeGen/MIRVRegNamerUtils.cpp | 6 ++- .../AArch64/mir-canon-constant-pool-hash.mir | 4 +- .../MIR/AArch64/mir-canon-jump-table.mir | 8 ++-- .../MIR/AArch64/mirCanonCopyCopyProp.mir | 2 +- .../MIR/AArch64/mirCanonIdempotent.mir | 12 +++--- llvm/test/CodeGen/MIR/AArch64/mirnamer.mir | 42 +++++++++---------- .../CodeGen/MIR/AMDGPU/mir-canon-multi.mir | 24 +++++------ .../MIR/AMDGPU/mircanon-memoperands.mir | 12 +++--- .../CodeGen/MIR/X86/mir-canon-hash-bb.mir | 6 +-- llvm/test/CodeGen/MIR/X86/mircanon-flags.mir | 18 ++++---- 10 files changed, 68 insertions(+), 66 deletions(-) diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index a2abe71a6bd7b..35c9aebc119c6 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -62,7 +62,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { /* HashConstantPoolIndices */ true, /* HashMemOperands */ true); assert(Hash && "Expected non-zero Hash"); - return std::to_string(Hash).substr(0, 5); + OS << format_hex_no_prefix(Hash, 16, true); + return OS.str(); } // Gets a hashable artifact from a given MachineOperand (ie an unsigned). @@ -132,7 +133,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { } auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end()); - return std::to_string(HashMI).substr(0, 5); + OS << format_hex_no_prefix(HashMI, 16, true); + return OS.str(); } unsigned VRegRenamer::createVirtualRegister(unsigned VReg) { diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir index 78ed554687fa2..46da2af6bdf04 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir @@ -14,8 +14,8 @@ constants: body: | bb.0: ; Test that we no longer have hash collisions between two different consts: - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:gpr64common = ADR - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:gpr64common = ADR + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:gpr64common = ADR + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:gpr64common = ADR %vreg0:gpr64common = ADRP target-flags(aarch64-page) %const.0 %vreg1:gpr64common = ADRP target-flags(aarch64-page) %const.1 ... diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir index 6d3124c61db1b..a5ffd6e4ce3f9 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir @@ -21,10 +21,10 @@ body: | bb.2: bb.3: bb.7: - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.0 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.1 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.2 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.3 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.0 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.1 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.2 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.3 %a:_(p0) = G_JUMP_TABLE %jump-table.0 %b:_(p0) = G_JUMP_TABLE %jump-table.1 %c:_(p0) = G_JUMP_TABLE %jump-table.2 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir index 21a7dddc98591..daf78187c4849 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir @@ -40,7 +40,7 @@ body: | %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) - ;CHECK: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) + ;CHECK: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) ;CHECK-NEXT: $w0 = COPY %bb0_ ;CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir index b30ca7c1c7e3c..63e28498ca532 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -1,12 +1,12 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -mir-vreg-namer-use-stable-hash -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # These Idempotent instructions are sorted alphabetically (based on after the '=') -# CHECK: %bb0_{{[0-9]+}}__1:gpr64 = MOVi64imm 4617315517961601024 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 408 -# CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = MOVi32imm 408 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr64all = IMPLICIT_DEF -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:fpr64 = FMOVDi 20 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:fpr64 = FMOVDi 112 +# CHECK: %bb0_{{[0-9a-f]+}}__1:gpr64 = MOVi64imm 4617315517961601024 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 408 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = MOVi32imm 408 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr64all = IMPLICIT_DEF +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fpr64 = FMOVDi 20 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fpr64 = FMOVDi 112 ... --- diff --git a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir index cdb2ecca60274..a3b339f07d502 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir @@ -8,9 +8,9 @@ body: | ;CHECK-LABEL: bb.0 ;CHECK-NEXT: liveins ;CHECK-NEXT: {{ $}} - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(p0) = COPY $d0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(<4 x s32>) = COPY $q0 - ;CHECK-NEXT: G_STORE %bb0_{{[0-9]+}}__1(<4 x s32>), %bb0_{{[0-9]+}}__1(p0) :: (store (<4 x s32>)) + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:_(p0) = COPY $d0 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:_(<4 x s32>) = COPY $q0 + ;CHECK-NEXT: G_STORE %bb0_{{[0-9a-f]+}}__1(<4 x s32>), %bb0_{{[0-9a-f]+}}__1(p0) :: (store (<4 x s32>)) liveins: $q0, $d0 %1:fpr(p0) = COPY $d0 @@ -28,19 +28,19 @@ body: | bb.0: ;CHECK-LABEL: bb.0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 2 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__3:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 3 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__4:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 4 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__3:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__5:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 5 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 2 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__3:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 3 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__4:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 4 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__3:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__5:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 5 %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = MOVi32imm 1 @@ -78,11 +78,11 @@ body: | ;CHECK-LABEL: bb.0: ;CHECK-NEXT: liveins ;CHECK-NEXT: {{ $}} - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: $w0 = COPY %bb0_{{[0-9]+}}__2 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui %stack.0, 0 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: $w0 = COPY %bb0_{{[0-9a-f]+}}__2 %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = COPY %0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir index 785cd20d31968..fb1728d9021b7 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -8,18 +8,18 @@ name: foo body: | bb.0: ; CHECK-LABEL: name: foo - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_32_xm0 = S_MOV_B32 61440 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_32_xm0 = S_MOV_B32 0 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY $vgpr0 - ; CHECK: %bb0_{{[0-9]+}}__1:sgpr_64 = COPY $sgpr0_sgpr1 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 9, 0 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 11, 0 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__2:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__1:vreg_64 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1 - ; CHECK: %bb0_{{[0-9]+}}__1:sgpr_128 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1, %bb0_{{[0-9]+}}__1, %subreg.sub2, %bb0_{{[0-9]+}}__2, %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, 0, 0, 0, 0, 0, implicit $exec + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_32_xm0 = S_MOV_B32 61440 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_32_xm0 = S_MOV_B32 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY $vgpr0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9a-f]+}}__1, 9, 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9a-f]+}}__1, 11, 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__2:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vreg_64 = REG_SEQUENCE %bb0_{{[0-9a-f]+}}__1, %subreg.sub0, %bb0_{{[0-9a-f]+}}__1, %subreg.sub1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sgpr_128 = REG_SEQUENCE %bb0_{{[0-9a-f]+}}__1, %subreg.sub0, %bb0_{{[0-9a-f]+}}__1, %subreg.sub1, %bb0_{{[0-9a-f]+}}__1, %subreg.sub2, %bb0_{{[0-9a-f]+}}__2, %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9a-f]+}}__1, %bb0_{{[0-9a-f]+}}__1, %bb0_{{[0-9a-f]+}}__1, 0, 0, 0, 0, 0, implicit $exec ; CHECK: S_ENDPGM 0 %10:sreg_32_xm0 = S_MOV_B32 61440 %11:sreg_32_xm0 = S_MOV_B32 0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir index e5d80e9c59fcd..99a905a1a7306 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir @@ -25,12 +25,12 @@ body: | liveins: $sgpr4_sgpr5 ; CHECK: COPY - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 = COPY $sgpr4_sgpr5 %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir index ebd29f917ffb6..32dc9e8752d8a 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -40,7 +40,7 @@ body: | G_BR %bb.2 ; CHECK: bb.1: - ; CHECK: %bb2_{{[0-9]+}}__1:_(s32) = G_CONSTANT + ; CHECK: %bb2_{{[0-9a-f]+}}__1:_(s32) = G_CONSTANT bb.1: %tmp4:_(s32) = G_CONSTANT i32 1 G_STORE %tmp4(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) @@ -48,13 +48,13 @@ body: | ; CHECK: bb.2: - ; CHECK: %bb1_{{[0-9]+}}__1:_(s32) = G_CONSTANT + ; CHECK: %bb1_{{[0-9a-f]+}}__1:_(s32) = G_CONSTANT bb.2: %tmp3:_(s32) = G_CONSTANT i32 2 G_STORE %tmp3(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) ; CHECK: bb.3: - ; CHECK: %bb3_{{[0-9]+}}__1:_(s32) = G_LOAD + ; CHECK: %bb3_{{[0-9a-f]+}}__1:_(s32) = G_LOAD bb.3: %tmp9:_(s32) = G_LOAD %tmp6(p0) :: (load (s32) from %ir.tmp1) $eax = COPY %tmp9(s32) diff --git a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir index bc5991ea41b5f..6b7b577f8ca54 100644 --- a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir +++ b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir @@ -12,15 +12,15 @@ body: | bb.0: ; CHECK: COPY - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nnan VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = ninf VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nsz VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = arcp VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = contract VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = afn VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = reassoc VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nsz arcp contract afn reassoc VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = contract afn reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nnan VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = ninf VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nsz VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = arcp VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = contract VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = afn VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nsz arcp contract afn reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = contract afn reassoc VMULSSrr %0:fr32 = COPY $xmm0 %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr From f11e07416a011ced399e1ec75b3267249e5c9adb Mon Sep 17 00:00:00 2001 From: Denys Shabalin Date: Wed, 2 Nov 2022 14:04:37 +0100 Subject: [PATCH 028/516] [mlir] Use the same pipeline tuning options as clang for execution engine This change make sure that ExecutionEngine's pass pipeline is identical to one used by clang. Previously, SLPVectorization was not enabled which caused differences in code... ...generation. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D137248 --- mlir/lib/ExecutionEngine/OptUtils.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mlir/lib/ExecutionEngine/OptUtils.cpp b/mlir/lib/ExecutionEngine/OptUtils.cpp index 403e54f004e2b..893a8a490d44a 100644 --- a/mlir/lib/ExecutionEngine/OptUtils.cpp +++ b/mlir/lib/ExecutionEngine/OptUtils.cpp @@ -68,7 +68,13 @@ mlir::makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel, CGSCCAnalysisManager cgam; ModuleAnalysisManager mam; - PassBuilder pb(targetMachine); + PipelineTuningOptions tuningOptions; + tuningOptions.LoopUnrolling = true; + tuningOptions.LoopInterleaving = true; + tuningOptions.LoopVectorization = true; + tuningOptions.SLPVectorization = true; + + PassBuilder pb(targetMachine, tuningOptions); pb.registerModuleAnalyses(mam); pb.registerCGSCCAnalyses(cgam); From fcbf807b55e6c39c09574f2d0be40f58f4140fdf Mon Sep 17 00:00:00 2001 From: OCHyams Date: Wed, 2 Nov 2022 14:08:23 +0000 Subject: [PATCH 029/516] Fix Assignment Tracking docs error from 33c7ae55e729069be754f56c4d4606cdeddd377b Buildbot link: https://lab.llvm.org/buildbot/#/builders/30/builds/27812 --- llvm/docs/SourceLevelDebugging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index 5a81ad01fdf10..35e1816b47c49 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -252,7 +252,7 @@ be indirect (i.e, a pointer to the source variable), provided that interpreting the complex expression derives the direct value. ``llvm.dbg.assign`` -^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^ .. code-block:: llvm From 73482b457e473df6affb124242c6ce9cf290ed5c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 1 Nov 2022 17:19:29 -0700 Subject: [PATCH 030/516] [RISCV] Fix cost of legal fixed length masked load and stores We can cost them the same way as a scalable masked load/store. By hitting the default path, we were costing them as if they were being scalarized. This is a significant over estimate. Differential Revision: https://reviews.llvm.org/D137218 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 3 +- .../Analysis/CostModel/RISCV/masked_ldst.ll | 28 +++++++++---------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index f89db1202a674..d44f6a647caa3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -227,7 +227,8 @@ InstructionCost RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) { - if (!isa(Src)) + if (!isLegalMaskedLoadStore(Src, Alignment) || + CostKind != TTI::TCK_RecipThroughput) return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll index 779f3ca505d58..680ced82c0d64 100644 --- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll @@ -3,23 +3,23 @@ define void @fixed() { ; CHECK-LABEL: 'fixed' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 8, <2 x i1> undef, <2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 8, <4 x i1> undef, <4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 8, <8 x i1> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>* undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; From 9dd7388668721ff1e5781e027adb01a985af46a5 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 28 Oct 2022 00:12:08 -0700 Subject: [PATCH 031/516] [libc] Add a porting guide to the docs. Reviewed By: jeffbailey Differential Revision: https://reviews.llvm.org/D136960 --- libc/docs/entrypoints.rst | 2 + libc/docs/index.rst | 1 + libc/docs/porting.rst | 120 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 libc/docs/porting.rst diff --git a/libc/docs/entrypoints.rst b/libc/docs/entrypoints.rst index dfc0aeca6fea4..3c24a922a2947 100644 --- a/libc/docs/entrypoints.rst +++ b/libc/docs/entrypoints.rst @@ -1,3 +1,5 @@ +.. _entrypoints: + Entrypoints in LLVM libc ------------------------ diff --git a/libc/docs/index.rst b/libc/docs/index.rst index 4a1825b5921c3..c5f39e89cb295 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -78,6 +78,7 @@ stages there is no ABI stability in any form. api_test mechanics_of_public_api source_layout + porting .. toctree:: :hidden: diff --git a/libc/docs/porting.rst b/libc/docs/porting.rst new file mode 100644 index 0000000000000..42f7fa33bc334 --- /dev/null +++ b/libc/docs/porting.rst @@ -0,0 +1,120 @@ +.. _porting: + +======================================= +Bringup on a New OS or Architecture +======================================= + +.. contents:: Table of Contents + :depth: 4 + :local: + +CI builders +=========== + +If you are contributing a port for a operating system or architecture which +is not covered by existing CI builders, you will also have to present a plan +for testing and contribute a CI builder. See +`this guide `_ for information +on how to add new builders to the +`LLVM buildbot `_. +You will either have to extend the existing +`Linux script `_ +and/or +`Windows script `_ +or add a new script for your operating system. + +An OS specific config directory +=============================== + +If you are starting to bring up LLVM's libc on a new operating system, the first +step is to add a directory for that OS in the ``libc/config`` directory. Both +`Linux `_ and +`Windows `_, +the two operating systems on which LLVM's libc is being actively developed, +have their own config directory. + +.. note:: Windows development is not as active as the development on Linux. + There is a + `Darwin `_ + config also which is in a similar state as Windows. + +.. note:: LLVM's libc is being brought up on the + `Fuchsia `_ operating system also. However, there is no + config directory for Fuchsia as the bring up is being done in the Fuchsia + source tree. + +The api.td file +--------------- + +If the :ref:`fullbuild_mode` is to be supported on the new operating system, +then a file named ``api.td`` should be added in its config directory. It is +written in the +`LLVM tablegen language `_. +It lists all the relevant macros and type definitions we want in the +public libc header files. See the existing Linux +`api.td `_ +file as an example to prepare the ``api.td`` file for the new operating system. + +.. note:: In future, LLVM tablegen will be replaced with a different DSL to list + config information. + +Architecture Subdirectory +========================= + +There are parts of the libc which are implemented differently for different +architectures. The simplest example of this is the ``syscall`` function and +its internal implementation - its Linux implementation differs for different +architectures. Since a large part of the libc makes use of syscalls (or an +equivalent on non-Linux like platforms), it might be simpler and convenient to +bring up the libc for one architecture at a time. In such cases, wherein the +support surface of LLVM's libc differs for each target architecture, one will +have to add a subdirectory (within the config directory os the operating +system) for each target architecture, and list the relevant config information +separately in those subdirectories. For example, for Linux, the x86_64 and +aarch64 configs are in separate directories, named +`x86_64 `_ +and `aarch64 `_. +The libc CMake machinery looks for subdirectories named after the target +architecture. + +The entrypoints.txt file +======================== + +One of the important pieces of config information is listed in a file named +``entrypoints.txt``. This file lists the targets for the entrypoints (see +:ref:`entrypoints`) you want to include in the static archive of the libc (for +the :ref:`overlay_mode` and/or the :ref:`fullbuild_mode`.) If you are doing an +architecture specific bring up, then an ``entrypoints.txt`` file should be +created in the architecture subdirectory for each architecture. Else, having a +single ``entrypoints.txt`` in the operating system directory is sufficient. + +The Linux config has an ``entrypoint.txt`` for each individual target +architecture separately: `aarch64 `_, +`arm32 `_ and +`x86_64 `_. On the +other hand, the Windows config has a single ``entrypoints.txt`` +`file `_. + +A typical bring up procedure will normally bring up a small group of entrypoints +at a time. The usual practice is to progressively add the targets for those +entrypoints to the ``entrypoints.txt`` file as they are being brought up. The +same is the case if one is implementing a new entrypoint - the target for the +new entrypoint should be added to the relevant ``entrypoints.txt`` file. If +the implementation of the new entrypoint supports multiple operating systems and +target architectures, then multiple ``entrypoints.txt`` files will have to be +updated. + +The headers.txt file +==================== + +Another important piece of config informtion is listed in a file named +``headers.txt``. It lists the targets for the set of public headers that are +provided by the libc. This is relevant only if the libc is to be used in the +:ref:`fullbuild_mode` on the target operating system and architecture. As with +the ``entrypoints.txt`` file, one ``headers.txt`` file should be listed for +each individual target architecture if you are doing an architecture specific +bring up. The Linux config has ``headers.txt`` file listed seperately for the +`aarch64 `_ +config and the +`x86_64 `_ +config. From a2a87ee7e9cdd329f2a6d35fe9a103b81d3153f6 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 28 Oct 2022 01:09:38 -0700 Subject: [PATCH 032/516] [libc] Add a contributing guide to the docs. Reviewed By: jeffbailey Differential Revision: https://reviews.llvm.org/D136961 --- libc/docs/clang_tidy_checks.rst | 2 ++ libc/docs/contributing.rst | 56 +++++++++++++++++++++++++++++++++ libc/docs/index.rst | 1 + libc/docs/math.rst | 2 ++ 4 files changed, 61 insertions(+) create mode 100644 libc/docs/contributing.rst diff --git a/libc/docs/clang_tidy_checks.rst b/libc/docs/clang_tidy_checks.rst index 01480672c465b..b0e72cd011d4b 100644 --- a/libc/docs/clang_tidy_checks.rst +++ b/libc/docs/clang_tidy_checks.rst @@ -1,3 +1,5 @@ +.. _clangtidy_rules: + LLVM libc clang-tidy checks =========================== These are the clang-tidy checks designed to help enforce implementation diff --git a/libc/docs/contributing.rst b/libc/docs/contributing.rst new file mode 100644 index 0000000000000..e17243df7bc69 --- /dev/null +++ b/libc/docs/contributing.rst @@ -0,0 +1,56 @@ +.. _contributing: + +================================ +Contributing to the libc Project +================================ + +LLVM's libc is being developed as part of the LLVM project so contributions +to the libc project should also follow the general LLVM +`contribution guidelines `_. Below is +a list of open projects that one can start with: + +#. **Cleanup code-style** - The libc project follows the general + `LLVM style `_ but differs in a + few aspects: We use ``snake_case`` for non-constant variable and function + names,``CamelCase`` for internal type names (those which are not defined in a + public header), and ``CAPITILIZED_SNAKE_CASE`` for constants. When we started + working on the project, we started using the general LLVM style for + everything. However, for a short period, we switched to the style that is + currently followed by the `LLD project `_. + But, considering that we implement a lot of functions and types whose names + are prescribed by the standards, we have settled on the style described above. + However, we have not switched over to this style in all parts of the ``libc`` + directory. So, a simple but mechanical project would be to move the parts + following the old styles to the new style. + +#. **Integrating with the rest of the LLVM project** - There are two parts to + this project: + + #. One is about adding CMake facilities to optionally link the libc's overlay + static archive (see :ref:`overlay_mode`) with other LLVM tools/executables. + #. The other is about putting plumbing in place to release the overlay static + archive (see :ref:`overlay_mode`) as part of the LLVM binary releases. + +#. **Implement Linux syscall wrappers** - A large portion of the POSIX API can + be implemented as syscall wrappers on Linux. A good number have already been + implemented but many more are yet to be implemented. So, a project of medium + complexity would be to implement syscall wrappers which have not yet been + implemented. + +#. **Add a better random number generator** - The current random number + generator has a very small range. This has to be improved or switched over + to a fast random number generator with a large range. + +#. **Update the clang-tidy lint rules and use them in the build and/or CI** - + Currently, the :ref:`clangtidy_rules` have gone stale and are mostly unused + by the developers and on the CI builders. This project is about updating + them and reintegrating them back with the build and running them on the + CI builders. + +#. **double and higher precision math functions** - These are under active + developement but you can take a shot at those not yet implemented. See + :ref:`math` for more information. + +#. **Contribute a new OS/Architecture port** - You can contribute a new + operating system or target architecture port. See :ref:`porting` for more + informaton. diff --git a/libc/docs/index.rst b/libc/docs/index.rst index c5f39e89cb295..7fe2b0d0278e9 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -79,6 +79,7 @@ stages there is no ABI stability in any form. mechanics_of_public_api source_layout porting + contributing .. toctree:: :hidden: diff --git a/libc/docs/math.rst b/libc/docs/math.rst index 329280bb305f7..dd965fe6629ec 100644 --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -1,3 +1,5 @@ +.. _math: + ============== Math Functions ============== From f7fd4f319acf354fdf0ac8d040443195d2026b6a Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 2 Nov 2022 00:34:11 -0700 Subject: [PATCH 033/516] [libc] Add links to libc Discourse and to the libc Discord channel. Reviewed By: jeffbailey Differential Revision: https://reviews.llvm.org/D137229 --- libc/docs/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libc/docs/index.rst b/libc/docs/index.rst index 7fe2b0d0278e9..c72544cdcc99b 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -88,4 +88,6 @@ stages there is no ABI stability in any form. Source Code Bug Reports + Discourse + Discord Channel Buildbot From 136b927c9e44867feb5b5d447843c5a588fb09b8 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Mon, 31 Oct 2022 01:25:25 -0700 Subject: [PATCH 034/516] [libc] Add a doc about the libc fullbuild mode. Reviewed By: jeffbailey Differential Revision: https://reviews.llvm.org/D137061 --- libc/docs/fullbuild_mode.rst | 78 +++++++++++++++++++++++++++++++++++- libc/examples/README.md | 1 + 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 libc/examples/README.md diff --git a/libc/docs/fullbuild_mode.rst b/libc/docs/fullbuild_mode.rst index 635dc8b66417f..a5c51aa4cb079 100644 --- a/libc/docs/fullbuild_mode.rst +++ b/libc/docs/fullbuild_mode.rst @@ -4,4 +4,80 @@ Fullbuild Mode ============== -Coming soon, stay tuned! +.. contents:: Table of Contents + :depth: 4 + :local: + +The *fullbuild* mode of LLVM's libc is the mode in which it is being used as +the only libc (as opposed to the :ref:`overlay_mode` in which it is used along +with the system libc.) Hence, to start using it that way, you will have to build +and install the ``libc.a`` static archive from LLVM's libc as well as the +start-up objects and public headers provided by it. In this document, we will +present a way to set up a *sysroot* (see the documentation of the ``--sysroot`` +option here: ``_) +which includes not only the components of LLVM's libc, but also full a LLVM only +toolchain consisting of the `clang `_ compiler, the +`lld `_ linker and the +`compiler-rt `_ runtime libraries. LLVM's libc +is not yet complete enough to allow using and linking a C++ application against +a C++ standard library (like libc++). Hence, we do not include a C++ standard +library in the sysroot. + +.. note:: When the libc is complete enough, we should be able to include + `libc++ `_, libcxx-abi and libunwind in the + toolchain and use them to build and link C++ applications. + +Building the full libc +====================== + +LLVM's libc uses `Scudo `_ +as its allocator. So, when building the full libc, we should specify that we +want Scudo to be included in the libc. Since the libc currently only supports +static linking, we also specify that we do not want a shared library for Scudo. +A typical ``cmake`` configure step will look like this: + +.. code-block:: sh + + $> cd llvm-project # The llvm-project checkout + $> mkdir build + $> cd build + $> cmake ../llvm -G Ninja \ + -DLLVM_ENABLE_PROJECTS="clang;libc;lld;compiler-rt" \ + -DCMAKE_BUILD_TYPE= \ # Select build type + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_LIBC_FULL_BUILD=ON \ # We want the full libc + -DLLVM_LIBC_INCLUDE_SCUDO=ON \ # Include Scudo in the libc + -DCOMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC=ON \ + -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \ + -DCOMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED=OFF \ + -DCMAKE_INSTALL_PREFIX= # Specify a sysroot directory + +Since we want to include ``clang``, ``lld`` and ``compiler-rt`` in our +toolchain, we list them in ``LLVM_ENABLE_PROJECTS`` along with ``libc``. The +option ``CMAKE_INSTALL_PREFIX`` specifies the sysroot directory in which to +install the new toolchain. + +Installation +============ + +To build and install the libc, clang (and its support libraries and builtins), +lld and compiler-rt, run the following command after the above ``cmake`` +command: + +.. code-block:: sh + + $> ninja install-clang install-builtins install-compiler-rt \ + install-core-resource-headers install-libc install-lld + +Once the above command completes successfully, the ```` directory you +have specified with the CMake configure step above will contain a full LLVM-only +toolchain with which you can build practical/real-world C applications. See +``_ for examples +of how to start using this new toolchain. + +Linux Headers +============= + +If you are using the full libc on Linux, then you will also need to install +Linux headers in your sysroot. It is left to the reader to figure out the best +way to install Linux headers on the system they want to use the full libc on. diff --git a/libc/examples/README.md b/libc/examples/README.md new file mode 100644 index 0000000000000..ecdbe33656939 --- /dev/null +++ b/libc/examples/README.md @@ -0,0 +1 @@ +Coming soon, stay tuned! From e57ab8fe91f06e71d3de2df07e6c7efd2f0c6078 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 08:30:21 -0700 Subject: [PATCH 035/516] llvm-reduce: Add explicit triple to test Some host targets are managing to get through MIR parsing without constructing their MachineFunctionInfo. Fixes at least SystemZ and SPARC (issue 58768) --- llvm/test/tools/llvm-reduce/file-output-type.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test index 5535866e7251d..9a00d8a00b76d 100644 --- a/llvm/test/tools/llvm-reduce/file-output-type.test +++ b/llvm/test/tools/llvm-reduce/file-output-type.test @@ -40,7 +40,7 @@ # Make sure MIR ignores requests for bitcode -# RUN: llvm-reduce -output-bitcode --delta-passes=instructions -o %t.3 --test FileCheck --test-arg --check-prefix=MIR --test-arg %s --test-arg --input-file %p/Inputs/test-output-format.mir +# RUN: llvm-reduce -mtriple=x86_64-- -output-bitcode --delta-passes=instructions -o %t.3 --test FileCheck --test-arg --check-prefix=MIR --test-arg %s --test-arg --input-file %p/Inputs/test-output-format.mir # RUN: llc -x mir -run-pass=none -o /dev/null %t.3 From 943b4c90a9644dbceea231a86d59520dd3c2a6db Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 2 Nov 2022 16:52:05 +0100 Subject: [PATCH 036/516] [flang] Handle dispatch on nopass procedure from array element When a fir.dispatch is done with a NOPASS type-bound procedure, the object in fir.dispatch is just used to gather the vtable. Therefore, no transformation is done on it and the original entity is used. The current code generation didn't expect the entity to be an array. This patch update the code generation to be able to retrieve the vtable accordingly. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D137255 --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 17 +++++++++++------ flang/test/Fir/dispatch.f90 | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index fae601b84671d..be49a0bf509be 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -915,12 +915,17 @@ struct DispatchOpConversion : public FIROpConversion { return emitError(loc) << "no binding tables found"; // Get derived type information. - auto declaredType = llvm::TypeSwitch( - dispatch.getObject().getType().getEleTy()) - .Case( - [](auto p) { return p.getEleTy(); }) - .Default([](mlir::Type t) { return t; }); - + auto declaredType = + llvm::TypeSwitch( + dispatch.getObject().getType().getEleTy()) + .Case( + [](auto p) { + if (auto seq = + p.getEleTy().template dyn_cast()) + return seq.getEleTy(); + return p.getEleTy(); + }) + .Default([](mlir::Type t) { return t; }); assert(declaredType.isa() && "expecting fir.type"); auto recordType = declaredType.dyn_cast(); std::string typeDescName = diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index e3e75f20ea45d..7b462df81c52f 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -120,6 +120,21 @@ subroutine display_class(p) call p%proc_pass(1) end subroutine + subroutine no_pass_array(a) + class(p1) :: a(:) + call a(1)%proc_nopass() + end subroutine + + subroutine no_pass_array_allocatable(a) + class(p1), allocatable :: a(:) + call a(1)%proc_nopass() + end subroutine + + subroutine no_pass_array_pointer(a) + class(p1), allocatable :: a(:) + call a(1)%proc_nopass() + end subroutine + end module program test_type_to_class @@ -232,6 +247,9 @@ program test_type_to_class ! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr ! CHECK: call void %[[FUNC_PTR]](ptr %[[INT32]], ptr %[[CLASS]]) +! CHECK-LABEL: _QMdispatch1Pno_pass_array +! CHECK-LABEL: _QMdispatch1Pno_pass_array_allocatable +! CHECK-LABEL: _QMdispatch1Pno_pass_array_pointer ! Check the layout of the binding table. This is easier to do in FIR than in ! LLVM IR. From 59a41809d85ed0e9e073f6b425cc915f6c231e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kevin=20Sala=20Penad=C3=A9s?= Date: Wed, 2 Nov 2022 12:03:34 -0400 Subject: [PATCH 037/516] [OpenMP][libomptarget] Fix AsyncInfoTy object in omp_target_memcpy The AsyncInfoTy should be created in the same device as the async operation will be issued. In omp_target_memcpy, the AsyncInfoTy for the host to destination device transfer was created referring to the source device. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D137225 --- openmp/libomptarget/src/api.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index d65ca0b02864d..f408449f01134 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -190,7 +190,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, Rc = SrcDev.retrieveData(Buffer, SrcAddr, Length, AsyncInfo); } if (Rc == OFFLOAD_SUCCESS) { - AsyncInfoTy AsyncInfo(SrcDev); + AsyncInfoTy AsyncInfo(DstDev); Rc = DstDev.submitData(DstAddr, Buffer, Length, AsyncInfo); } free(Buffer); From 1c0f541a4d1d4eebda81838c717b5b58525e6c7f Mon Sep 17 00:00:00 2001 From: rkayaith Date: Tue, 11 Oct 2022 17:29:39 -0400 Subject: [PATCH 038/516] [mlir] Don't mix -pass-pipeline with other pass options These are test updates required for D135745, which disallows mixing `-pass-pipeline` and the individual `-pass-name` options. Reviewed By: rriddle, mehdi_amini Differential Revision: https://reviews.llvm.org/D135746 --- .../VectorToGPU/vector-to-mma-ops.mlir | 2 +- mlir/test/Dialect/GPU/promotion.mlir | 2 +- .../GPU/CUDA/test-reduction-distribute.mlir | 6 +++--- .../Vector/GPU/CUDA/test-warp-distribute.mlir | 18 +++++++++--------- .../GPU/CUDA/TensorCore/wmma-matmul-f16.mlir | 6 +++--- .../GPU/CUDA/TensorCore/wmma-matmul-f32.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-and.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-max.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-min.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-op.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-or.mlir | 6 +++--- .../GPU/CUDA/all-reduce-region.mlir | 6 +++--- .../Integration/GPU/CUDA/all-reduce-xor.mlir | 6 +++--- mlir/test/Integration/GPU/CUDA/async.mlir | 10 +++++----- .../Integration/GPU/CUDA/gpu-to-cubin.mlir | 6 +++--- .../GPU/CUDA/multiple-all-reduce.mlir | 6 +++--- mlir/test/Integration/GPU/CUDA/shuffle.mlir | 6 +++--- .../test/Integration/GPU/CUDA/two-modules.mlir | 6 +++--- .../Integration/GPU/ROCM/gpu-to-hsaco.mlir | 6 +++--- mlir/test/Integration/GPU/ROCM/printf.mlir | 4 ++-- .../test/Integration/GPU/ROCM/two-modules.mlir | 6 +++--- mlir/test/Integration/GPU/ROCM/vecadd.mlir | 8 ++++---- .../GPU/ROCM/vector-transferops.mlir | 8 ++++---- mlir/test/Pass/pipeline-parsing.mlir | 2 +- .../mlir-cpu-runner/bare-ptr-call-conv.mlir | 2 +- 25 files changed, 76 insertions(+), 76 deletions(-) diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index b5d08e0817100..5da76a95dc2a1 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu)" -canonicalize | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu),canonicalize" | FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir index bf22add0a685b..3d603ea38677b 100644 --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s gpu.module @foo { diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir index a34838b0ef0a0..5c54272a93c91 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\ // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\ // RUN: mlir-opt -lower-affine -convert-scf-to-cf -convert-vector-to-llvm \ -// RUN: -convert-arith-to-llvm -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir index 4eedd2a085aea..0740c97d45857 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -2,9 +2,9 @@ // everything on the same thread. // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -15,9 +15,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -27,9 +27,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir index 5c20cb3b3c961..06896b5bfa0ed 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir index 98ba4fa90f89e..b690d50e2d6fc 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index 03acf3dc16e96..f8461a5dddd1d 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir index d39d23ad28b96..b1f76c75593a0 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir index 473fe0725f7c0..41b373d8461aa 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index 58ad00fd3d0c5..1727668423acc 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index ef1a36da777c4..ff9b20ab40a03 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index 118ae800c08ba..b7dbb95ed6e86 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index 4e667503693d0..7c6d494bcf9a6 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir index e57801ee1b912..c09ab7a9819a8 100644 --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-async-region -gpu-to-llvm \ -// RUN: -async-to-async-runtime -async-runtime-ref-counting \ -// RUN: -convert-async-to-llvm -convert-func-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \ +// RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \ +// RUN: | mlir-opt -convert-async-to-llvm -convert-func-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_async_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir index fc2540046e9f6..e1ad2f61185b1 100644 --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir index 137465ff93909..bb2c8439a1db2 100644 --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir index 94392cf89264f..a72ee8b9214e5 100644 --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir index 3d25e7845126b..b30f073307d8b 100644 --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir index 2372752219bae..1cc7db41260f6 100644 --- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir +++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir index b4b7eb66d818d..29c3c63c91292 100644 --- a/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir index 3a3d1862adead..196b412fe088e 100644 --- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir +++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir index d2857919c8966..9041441e10958 100644 --- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir +++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm=use-bare-pointers-for-kernels=true \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir index 4d2e55e0920f7..ca7da5d730e68 100644 --- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir +++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir index 77519e1ed1897..0ca4897988ee1 100644 --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass)' -pass-pipeline="func.func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass),func.func(cse,canonicalize)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK // RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s diff --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir index 016adcf43d732..55a4ba6a19490 100644 --- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s // Verify bare pointer memref calling convention. `simple_add1_add2_test` // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second From 0c36a1569a066e2a5fcbaa2835c7b3ea49f60458 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Tue, 11 Oct 2022 15:23:48 -0400 Subject: [PATCH 039/516] [mlir][Pass] Disallow mixing -pass-pipeline with other pass options Currently `-pass-pipeline` can be specified multiple times and mixed with the individual `-pass-name` options. Removing this feature will allow for including the pipeline anchor as part of the option argument (see D134900). Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D135745 --- mlir/include/mlir/Pass/PassRegistry.h | 5 +- mlir/lib/Pass/PassRegistry.cpp | 68 ++++++++++++--------------- mlir/test/Pass/pipeline-parsing.mlir | 3 ++ 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/mlir/include/mlir/Pass/PassRegistry.h b/mlir/include/mlir/Pass/PassRegistry.h index 4f261e533ad15..97692262acc8e 100644 --- a/mlir/include/mlir/Pass/PassRegistry.h +++ b/mlir/include/mlir/Pass/PassRegistry.h @@ -231,7 +231,8 @@ struct PassPipelineCLParserImpl; /// options for each of the passes and pipelines that have been registered with /// the pass registry; Meaning that `-cse` will refer to the CSE pass in MLIR. /// It also registers an argument, `pass-pipeline`, that supports parsing a -/// textual description of a pipeline. +/// textual description of a pipeline. This option is mutually exclusive with +/// the individual pass options. class PassPipelineCLParser { public: /// Construct a pass pipeline parser with the given command line description. @@ -254,6 +255,8 @@ class PassPipelineCLParser { private: std::unique_ptr impl; + + llvm::cl::opt passPipeline; }; /// This class implements a command-line parser specifically for MLIR pass diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index 423c97c7a6466..31b41153874a8 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -12,6 +12,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" @@ -532,6 +533,13 @@ LogicalResult TextualPipeline::initialize(StringRef text, LogicalResult TextualPipeline::addToPipeline( OpPassManager &pm, function_ref errorHandler) const { + // Temporarily disable implicit nesting while we append to the pipeline. We + // want the created pipeline to exactly match the parsed text pipeline, so + // it's preferrable to just error out if implicit nesting would be required. + OpPassManager::Nesting nesting = pm.getNesting(); + pm.setNesting(OpPassManager::Nesting::Explicit); + auto restore = llvm::make_scope_exit([&]() { pm.setNesting(nesting); }); + return addToPipeline(pipeline, pm, errorHandler); } @@ -730,10 +738,6 @@ struct PassArgData { /// This field is set when instance specific pass options have been provided /// on the command line. StringRef options; - - /// This field is used when the parsed option corresponds to an explicit - /// pipeline. - TextualPipeline pipeline; }; } // namespace @@ -775,9 +779,8 @@ struct PassNameParser : public llvm::cl::parser { PassArgData &value); /// If true, this parser only parses entries that correspond to a concrete - /// pass registry entry, and does not add a `pass-pipeline` argument, does not - /// include the options for pass entries, and does not include pass pipelines - /// entries. + /// pass registry entry, and does not include pipeline entries or the options + /// for pass entries. bool passNamesOnly = false; }; } // namespace @@ -785,12 +788,6 @@ struct PassNameParser : public llvm::cl::parser { void PassNameParser::initialize() { llvm::cl::parser::initialize(); - /// Add an entry for the textual pass pipeline option. - if (!passNamesOnly) { - addLiteralOption(passPipelineArg, PassArgData(), - "A textual description of a pass pipeline to run"); - } - /// Add the pass entries. for (const auto &kv : *passRegistry) { addLiteralOption(kv.second.getPassArgument(), &kv.second, @@ -823,11 +820,6 @@ void PassNameParser::printOptionInfo(const llvm::cl::Option &opt, llvm::outs() << " " << opt.HelpStr << '\n'; } - // Print the top-level pipeline argument. - printOptionHelp(passPipelineArg, - "A textual description of a pass pipeline to run", - /*indent=*/4, globalWidth, /*isTopLevel=*/!opt.hasArgStr()); - // Functor used to print the ordered entries of a registration map. auto printOrderedEntries = [&](StringRef header, auto &map) { llvm::SmallVector orderedEntries; @@ -865,11 +857,6 @@ size_t PassNameParser::getOptionWidth(const llvm::cl::Option &opt) const { bool PassNameParser::parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, PassArgData &value) { - // Handle the pipeline option explicitly. - if (argName == passPipelineArg) - return failed(value.pipeline.initialize(arg, llvm::errs())); - - // Otherwise, default to the base for handling. if (llvm::cl::parser::parse(opt, argName, arg, value)) return true; value.options = arg; @@ -907,12 +894,16 @@ struct PassPipelineCLParserImpl { /// Construct a pass pipeline parser with the given command line description. PassPipelineCLParser::PassPipelineCLParser(StringRef arg, StringRef description) : impl(std::make_unique( - arg, description, /*passNamesOnly=*/false)) {} + arg, description, /*passNamesOnly=*/false)), + passPipeline( + StringRef(passPipelineArg), + llvm::cl::desc("Textual description of the pass pipeline to run")) {} PassPipelineCLParser::~PassPipelineCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassPipelineCLParser::hasAnyOccurrences() const { - return impl->passList.getNumOccurrences() != 0; + return passPipeline.getNumOccurrences() != 0 || + impl->passList.getNumOccurrences() != 0; } /// Returns true if the given pass registry entry was registered at the @@ -925,19 +916,22 @@ bool PassPipelineCLParser::contains(const PassRegistryEntry *entry) const { LogicalResult PassPipelineCLParser::addToPipeline( OpPassManager &pm, function_ref errorHandler) const { + if (passPipeline.getNumOccurrences()) { + if (impl->passList.getNumOccurrences()) + return errorHandler( + "'-" + passPipelineArg + + "' option can't be used with individual pass options"); + std::string errMsg; + llvm::raw_string_ostream os(errMsg); + if (failed(parsePassPipeline(passPipeline, pm, os))) + return errorHandler(errMsg); + return success(); + } + for (auto &passIt : impl->passList) { - if (passIt.registryEntry) { - if (failed(passIt.registryEntry->addToPipeline(pm, passIt.options, - errorHandler))) - return failure(); - } else { - OpPassManager::Nesting nesting = pm.getNesting(); - pm.setNesting(OpPassManager::Nesting::Explicit); - LogicalResult status = passIt.pipeline.addToPipeline(pm, errorHandler); - pm.setNesting(nesting); - if (failed(status)) - return failure(); - } + if (failed(passIt.registryEntry->addToPipeline(pm, passIt.options, + errorHandler))) + return failure(); } return success(); } diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir index 0ca4897988ee1..3e7ce7cb68020 100644 --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -13,6 +13,9 @@ // CHECK_ERROR_4: does not refer to a registered pass or pass pipeline // CHECK_ERROR_5: Can't add pass '{{.*}}TestModulePass' restricted to 'builtin.module' on a PassManager intended to run on 'func.func', did you intend to nest? +// RUN: not mlir-opt %s -pass-pipeline='' -cse 2>&1 | FileCheck --check-prefix=CHECK_ERROR_6 %s +// CHECK_ERROR_6: '-pass-pipeline' option can't be used with individual pass options + func.func @foo() { return } From 4b21ecf10c8a0abb977bf11edf939cc708902cd3 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 2 Nov 2022 09:30:55 -0700 Subject: [PATCH 040/516] [lldb] Update TestDump.test for gnuwin32's 'file' command output Remove the colon from the CHECK line for the output of the file command. On macOS and Linux, the file command uses a colon as the separator between the path and the file type, but gnuwin32's file command uses a semicolon. --- lldb/test/Shell/Diagnostics/TestDump.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/Diagnostics/TestDump.test b/lldb/test/Shell/Diagnostics/TestDump.test index 9bb34aafc8c3a..2adde6b86d35a 100644 --- a/lldb/test/Shell/Diagnostics/TestDump.test +++ b/lldb/test/Shell/Diagnostics/TestDump.test @@ -12,4 +12,4 @@ # RUN: %lldb -o 'diagnostics dump -d %t.nonexisting' # RUN: file %t.nonexisting | FileCheck %s -# CHECK: : directory +# CHECK: directory From 12d8e7c6ade55bba241259312e3e4bdcf6aeab81 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 2 Nov 2022 09:35:00 -0700 Subject: [PATCH 041/516] [cmake][msvc] Enable standards-conforming preprocessor Since we now only support Visual Studio 2019 16.7 and newer, we're able to use the /Zc:preprocessor flag that turns on the standards-conforming preprocessor. Among other things, it correctly expands __VA_ARGS__. This enables removal of some boilerplate in D135128. Reviewed By: Bigcheese, thieta Differential Revision: https://reviews.llvm.org/D135128 --- llvm/cmake/modules/HandleLLVMOptions.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 7828e8a1627f2..8be51f74a9814 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -481,6 +481,10 @@ if( MSVC ) append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + # Enable standards-conforming preprocessor. + # https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor + append("/Zc:preprocessor" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + # Some projects use the __cplusplus preprocessor macro to check support for # a particular version of the C++ standard. When this option is not specified # explicitly, macro's value is "199711L" that implies C++98 Standard. From 85c2d92b9b3217229ced6a0f22e7013f7a65e124 Mon Sep 17 00:00:00 2001 From: Ethan Stewart Date: Wed, 2 Nov 2022 11:37:42 -0500 Subject: [PATCH 042/516] [openmp][AMDGPU] - Correct getNumberOfBlocks calculation. This patch fixes the 6 amdgpu buildbot lit test failures introduced by https://reviews.llvm.org/D135444. libomptarget :: amdgcn-amd-amdhsa :: mapping/reduction_implicit_map.cpp libomptarget :: amdgcn-amd-amdhsa :: offloading/cuda_no_devices.c libomptarget :: amdgcn-amd-amdhsa :: offloading/target-teams-atomic.c libomptarget :: amdgcn-amd-amdhsa-LTO :: mapping/reduction_implicit_map.cpp libomptarget :: amdgcn-amd-amdhsa-LTO :: offloading/cuda_no_devices.c libomptarget :: amdgcn-amd-amdhsa-LTO :: offloading/target-teams-atomic.c Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D137261 --- openmp/libomptarget/DeviceRTL/src/Mapping.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 7cef92d304b66..512577c06f9eb 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -79,7 +79,9 @@ uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); } uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); } -uint32_t getNumberOfBlocks() { return __builtin_amdgcn_grid_size_x(); } +uint32_t getNumberOfBlocks() { + return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); +} uint32_t getWarpId() { return impl::getThreadIdInBlock() / mapping::getWarpSize(); From f4cd3674ea2e055a770fa993cbfd2356c38fc545 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 28 Oct 2022 23:39:47 +0000 Subject: [PATCH 043/516] [mlir][scf] refactor scf structuralOpConversion to better support 1:N type conversion This patch moves the 1:N type mapping into its own classes to allow better code reuse in D137100. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D137099 --- .../Transforms/StructuralTypeConversions.cpp | 104 ++++++++++++------ 1 file changed, 71 insertions(+), 33 deletions(-) diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index c4c219617b782..a441b6c80b75b 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -32,22 +32,82 @@ static void unpackUnrealizedConversionCast(Value v, unpacked.push_back(v); } -class ConvertForOpTypes : public OpConversionPattern { +// CRTP +// A base class that takes care of 1:N type conversion, which maps the converted +// op results (computed by the derived class) and materializes 1:N conversion. +template +class Structural1ToNConversionPattern : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::typeConverter; + using OpConversionPattern::OpConversionPattern; + using OpAdaptor = typename OpConversionPattern::OpAdaptor; + + // + // Derived classes should provide the following method which performs the + // actual conversion. It should return llvm::None upon conversion failure and + // return the converted operation upon success. + // + // Optional convertSourceOp(SourceOp op, OpAdaptor adaptor, + // ConversionPatternRewriter &rewriter, + // TypeRange dstTypes) const; + LogicalResult - matchAndRewrite(ForOp op, OpAdaptor adaptor, + matchAndRewrite(SourceOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - SmallVector newResultTypes; + SmallVector dstTypes; SmallVector offsets; offsets.push_back(0); // Do the type conversion and record the offsets. for (Type type : op.getResultTypes()) { - if (failed(typeConverter->convertTypes(type, newResultTypes))) - return rewriter.notifyMatchFailure(op, "could not convert result"); - offsets.push_back(newResultTypes.size()); + if (failed(typeConverter->convertTypes(type, dstTypes))) + return rewriter.notifyMatchFailure(op, "could not convert result type"); + offsets.push_back(dstTypes.size()); } + // Calls the actual converter implementation to convert the operation. + Optional newOp = + static_cast(this)->convertSourceOp( + op, adaptor, rewriter, dstTypes); + + if (!newOp) + return rewriter.notifyMatchFailure(op, "could not convert operation"); + + // Packs the return value. + SmallVector packedRets; + for (unsigned i = 1, e = offsets.size(); i < e; i++) { + unsigned start = offsets[i - 1], end = offsets[i]; + unsigned len = end - start; + ValueRange mappedValue = newOp->getResults().slice(start, len); + if (len != 1) { + // 1 : N type conversion. + Type origType = op.getResultTypes()[i - 1]; + Value mat = typeConverter->materializeSourceConversion( + rewriter, op.getLoc(), origType, mappedValue); + if (!mat) { + return rewriter.notifyMatchFailure( + op, "Failed to materialize 1:N type conversion"); + } + packedRets.push_back(mat); + } else { + // 1 : 1 type conversion. + packedRets.push_back(mappedValue.front()); + } + } + + rewriter.replaceOp(op, packedRets); + return success(); + } +}; + +class ConvertForOpTypes + : public Structural1ToNConversionPattern { +public: + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; + + // The callback required by CRTP. + Optional convertSourceOp(ForOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { // Create a empty new op and inline the regions from the old op. // // This is a little bit tricky. We have two concerns here: @@ -67,15 +127,15 @@ class ConvertForOpTypes : public OpConversionPattern { // convertRegionTypes already takes care of 1:N conversion. if (failed(rewriter.convertRegionTypes(&op.getLoopBody(), *typeConverter))) - return failure(); + return llvm::None; // Unpacked the iteration arguments. SmallVector flatArgs; for (Value arg : adaptor.getInitArgs()) unpackUnrealizedConversionCast(arg, flatArgs); - // We can not do clone as the number of result types after conversion might - // be different. + // We can not do clone as the number of result types after conversion + // might be different. ForOp newOp = rewriter.create(op.getLoc(), adaptor.getLowerBound(), adaptor.getUpperBound(), adaptor.getStep(), flatArgs); @@ -89,29 +149,7 @@ class ConvertForOpTypes : public OpConversionPattern { rewriter.inlineRegionBefore(op.getLoopBody(), newOp.getLoopBody(), newOp.getLoopBody().end()); - // Pack the return value. - SmallVector packedRets; - for (unsigned i = 1, e = offsets.size(); i < e; i++) { - unsigned start = offsets[i - 1], end = offsets[i]; - unsigned len = end - start; - ValueRange mappedValue = newOp.getResults().slice(start, len); - if (len != 1) { - // 1 : N type conversion. - Type origType = op.getResultTypes()[i - 1]; - Value mat = typeConverter->materializeSourceConversion( - rewriter, op.getLoc(), origType, mappedValue); - if (!mat) - return rewriter.notifyMatchFailure( - op, "Failed to materialize 1:N type conversion"); - packedRets.push_back(mat); - } else { - // 1 : 1 type conversion. - packedRets.push_back(mappedValue.front()); - } - } - - rewriter.replaceOp(op, packedRets); - return success(); + return newOp; } }; } // namespace From 55292e9ee152e4421b5e52e6a9247d479abdb0e1 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 2 Nov 2022 09:39:16 -0700 Subject: [PATCH 044/516] [mlir][Transform] Fix ASAN heap-use-after-free --- mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp index 9b85af35783e7..5d84b7b0a6030 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp @@ -130,7 +130,7 @@ LogicalResult transform::TransformState::updatePayloadOps( if (failed(result.checkAndReport())) return failure(); - std::swap(association, updated); + it->second = updated; return success(); } From 69112d0d2dc1445a00f2b6a623c89e9f0d20a915 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Mon, 31 Oct 2022 12:50:17 -0400 Subject: [PATCH 045/516] [mlir][arith] Use declarative asm format for fastmath flags Switch to using an optional group in the assembly format instead of a custom directive. There's no change to the actual printed format. Reviewed By: vzakhari Differential Revision: https://reviews.llvm.org/D137087 --- .../include/mlir/Dialect/Arith/IR/ArithOps.td | 4 +-- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 25 ------------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index f12a1a33f6912..6ca74392f0565 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -71,7 +71,7 @@ class Arith_FloatUnaryOp traits = []> : Arguments<(ins FloatLike:$operand, DefaultValuedAttr:$fastmath)>, Results<(outs FloatLike:$result)> { - let assemblyFormat = [{ $operand custom($fastmath) + let assemblyFormat = [{ $operand (`fastmath` `` $fastmath^)? attr-dict `:` type($result) }]; } @@ -83,7 +83,7 @@ class Arith_FloatBinaryOp traits = []> : Arguments<(ins FloatLike:$lhs, FloatLike:$rhs, DefaultValuedAttr:$fastmath)>, Results<(outs FloatLike:$result)> { - let assemblyFormat = [{ $lhs `,` $rhs `` custom($fastmath) + let assemblyFormat = [{ $lhs `,` $rhs (`fastmath` `` $fastmath^)? attr-dict `:` type($result) }]; } diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 5693ad1c0e8d1..d1d03a549092d 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -23,31 +23,6 @@ using namespace mlir; using namespace mlir::arith; -//===----------------------------------------------------------------------===// -// Floating point op parse/print helpers -//===----------------------------------------------------------------------===// -static ParseResult parseArithFastMathAttr(OpAsmParser &parser, - Attribute &attr) { - if (succeeded( - parser.parseOptionalKeyword(FastMathFlagsAttr::getMnemonic()))) { - attr = FastMathFlagsAttr::parse(parser, Type{}); - return success(static_cast(attr)); - } else { - // No fastmath attribute mnemonic present - defer attribute creation and use - // the default value. - return success(); - } -} - -static void printArithFastMathAttr(OpAsmPrinter &printer, Operation *op, - FastMathFlagsAttr fmAttr) { - // Elide printing the fastmath attribute when fastmath=none - if (fmAttr && (fmAttr.getValue() != FastMathFlags::none)) { - printer << " " << FastMathFlagsAttr::getMnemonic(); - fmAttr.print(printer); - } -} - //===----------------------------------------------------------------------===// // Pattern helpers //===----------------------------------------------------------------------===// From 96a74c452728fd330f99394bb25dacecd9325645 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 17:50:02 +0100 Subject: [PATCH 046/516] [ValueLattice] Fix typo in condition (NFC) Fix typo pointed out by Roman Divacky. There should be no functional change, as the rest of the code will return nullptr for undef anyway. The condition is just there for clarity. --- llvm/lib/Analysis/ValueLattice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ValueLattice.cpp b/llvm/lib/Analysis/ValueLattice.cpp index aec7b9950c604..1d2177a92eb46 100644 --- a/llvm/lib/Analysis/ValueLattice.cpp +++ b/llvm/lib/Analysis/ValueLattice.cpp @@ -20,7 +20,7 @@ ValueLatticeElement::getCompare(CmpInst::Predicate Pred, Type *Ty, // TODO: Can be made more precise, but always returning undef would be // incorrect. - if (isUndef() || isUndef()) + if (isUndef() || Other.isUndef()) return nullptr; if (isConstant() && Other.isConstant()) From 1ca119728ee1566ecc53bed350cf6c8db6bc88e5 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Mon, 31 Oct 2022 16:26:32 +0000 Subject: [PATCH 047/516] [mlir][scf] support 1:N type conversion for scf.if/while/condition Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D137100 --- .../Transforms/StructuralTypeConversions.cpp | 110 ++++++++---------- .../SparseTensor/scf_1_N_conversion.mlir | 65 +++++++++++ 2 files changed, 114 insertions(+), 61 deletions(-) diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index a441b6c80b75b..ac3d76d569228 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -155,44 +155,57 @@ class ConvertForOpTypes } // namespace namespace { -class ConvertIfOpTypes : public OpConversionPattern { +class ConvertIfOpTypes + : public Structural1ToNConversionPattern { public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(IfOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - // TODO: Generalize this to any type conversion, not just 1:1. - // - // We need to implement something more sophisticated here that tracks - // which types convert to which other types and does the appropriate - // materialization logic. - // For example, it's possible that one result type converts to 0 types and - // another to 2 types, so newResultTypes would at least be the right size - // to not crash in the llvm::zip call below, but then we would set the the - // wrong type on the SSA values! These edge cases are also why we cannot - // safely use the TypeConverter::convertTypes helper here. - SmallVector newResultTypes; - for (auto type : op.getResultTypes()) { - Type newType = typeConverter->convertType(type); - if (!newType) - return rewriter.notifyMatchFailure(op, "not a 1:1 type conversion"); - newResultTypes.push_back(newType); - } + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; - // See comments in the ForOp pattern for why we clone without regions and - // then inline. - IfOp newOp = cast(rewriter.cloneWithoutRegions(*op.getOperation())); + Optional convertSourceOp(IfOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { + + IfOp newOp = rewriter.create(op.getLoc(), dstTypes, + adaptor.getCondition(), true); + newOp->setAttrs(op->getAttrs()); + + // We do not need the empty blocks created by rewriter. + rewriter.eraseBlock(newOp.elseBlock()); + rewriter.eraseBlock(newOp.thenBlock()); + + // Inlines block from the original operation. rewriter.inlineRegionBefore(op.getThenRegion(), newOp.getThenRegion(), newOp.getThenRegion().end()); rewriter.inlineRegionBefore(op.getElseRegion(), newOp.getElseRegion(), newOp.getElseRegion().end()); - // Update the operands and types. - newOp->setOperands(adaptor.getOperands()); - for (auto t : llvm::zip(newOp.getResults(), newResultTypes)) - std::get<0>(t).setType(std::get<1>(t)); - rewriter.replaceOp(op, newOp.getResults()); - return success(); + return newOp; + } +}; +} // namespace + +namespace { +class ConvertWhileOpTypes + : public Structural1ToNConversionPattern { +public: + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; + + Optional convertSourceOp(WhileOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { + // Unpacked the iteration arguments. + SmallVector flatArgs; + for (Value arg : adaptor.getOperands()) + unpackUnrealizedConversionCast(arg, flatArgs); + + auto newOp = rewriter.create(op.getLoc(), dstTypes, flatArgs); + + for (auto i : {0u, 1u}) { + if (failed(rewriter.convertRegionTypes(&op.getRegion(i), *typeConverter))) + return llvm::None; + auto &dstRegion = newOp.getRegion(i); + rewriter.inlineRegionBefore(op.getRegion(i), dstRegion, dstRegion.end()); + } + return newOp; } }; } // namespace @@ -217,34 +230,6 @@ class ConvertYieldOpTypes : public OpConversionPattern { }; } // namespace -namespace { -class ConvertWhileOpTypes : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(WhileOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto *converter = getTypeConverter(); - assert(converter); - SmallVector newResultTypes; - if (failed(converter->convertTypes(op.getResultTypes(), newResultTypes))) - return failure(); - - auto newOp = rewriter.create(op.getLoc(), newResultTypes, - adaptor.getOperands()); - for (auto i : {0u, 1u}) { - auto &dstRegion = newOp.getRegion(i); - rewriter.inlineRegionBefore(op.getRegion(i), dstRegion, dstRegion.end()); - if (failed(rewriter.convertRegionTypes(&dstRegion, *converter))) - return rewriter.notifyMatchFailure(op, "could not convert body types"); - } - rewriter.replaceOp(op, newOp.getResults()); - return success(); - } -}; -} // namespace - namespace { class ConvertConditionOpTypes : public OpConversionPattern { public: @@ -252,8 +237,11 @@ class ConvertConditionOpTypes : public OpConversionPattern { LogicalResult matchAndRewrite(ConditionOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - rewriter.updateRootInPlace( - op, [&]() { op->setOperands(adaptor.getOperands()); }); + SmallVector unpackedYield; + for (Value operand : adaptor.getOperands()) + unpackUnrealizedConversionCast(operand, unpackedYield); + + rewriter.updateRootInPlace(op, [&]() { op->setOperands(unpackedYield); }); return success(); } }; diff --git a/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir b/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir index 334d58c623936..207e46b3d45ae 100644 --- a/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir @@ -30,3 +30,68 @@ func.func @for(%in: tensor<1024xf32, #SparseVector>, return %1 : tensor<1024xf32, #SparseVector> } + +// CHECK-LABEL: func @if( +// CHECK-SAME: %[[DIM_SIZE:.*0]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR:.*1]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE:.*2]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER:.*3]]: memref, +// CHECK-SAME: %[[INDICES:.*4]]: memref, +// CHECK-SAME: %[[VALUE:.*5]]: memref, +// CHECK-SAME: %[[DIM_SIZE_1:.*6]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR_1:.*7]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE_1:.*8]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER_1:.*9]]: memref, +// CHECK-SAME: %[[INDICES_1:.*10]]: memref, +// CHECK-SAME: %[[VALUE_1:.*11]]: memref, +// CHECK-SAME: %[[TMP_arg12:.*12]]: i1) -> +// CHECK-SAME: (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: %[[SV:.*]]:6 = scf.if %[[TMP_arg12]] -> (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: scf.yield %[[DIM_SIZE]], %[[DIM_CURSOR]], %[[MEM_SIZE]], %[[POINTER]], %[[INDICES]], %[[VALUE]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } else { +// CHECK: scf.yield %[[DIM_SIZE_1]], %[[DIM_CURSOR_1]], %[[MEM_SIZE_1]], %[[POINTER_1]], %[[INDICES_1]], %[[VALUE_1]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } +// CHECK: return %[[SV]]#0, %[[SV]]#1, %[[SV]]#2, %[[SV]]#3, %[[SV]]#4, %[[SV]]#5 : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +func.func @if(%t: tensor<1024xf32, #SparseVector>, + %f: tensor<1024xf32, #SparseVector>, + %c: i1) -> tensor<1024xf32, #SparseVector> { + %1 = scf.if %c -> tensor<1024xf32, #SparseVector> { + scf.yield %t : tensor<1024xf32, #SparseVector> + } else { + scf.yield %f : tensor<1024xf32, #SparseVector> + } + + return %1 : tensor<1024xf32, #SparseVector> +} + +// CHECK-LABEL: func @while( +// CHECK-SAME: %[[DIM_SIZE:.*0]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR:.*1]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE:.*2]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER:.*3]]: memref, +// CHECK-SAME: %[[INDICES:.*4]]: memref, +// CHECK-SAME: %[[VALUE:.*5]]: memref, +// CHECK-SAME: %[[TMP_arg6:.*6]]: i1) -> +// CHECK-SAME: (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: %[[SV:.*]]:6 = scf.while ( +// CHECK-SAME: %[[TMP_arg7:.*]] = %[[DIM_SIZE]], +// CHECK-SAME: %[[TMP_arg8:.*]] = %[[DIM_CURSOR]], +// CHECK-SAME: %[[TMP_arg9:.*]] = %[[MEM_SIZE]], +// CHECK-SAME: %[[TMP_arg10:.*]] = %[[POINTER]], +// CHECK-SAME: %[[TMP_arg11:.*]] = %[[INDICES]], +// CHECK-SAME: %[[TMP_arg12:.*]] = %[[VALUE]]) +// CHECK: scf.condition(%[[TMP_arg6]]) %[[TMP_arg7]], %[[TMP_arg8]], %[[TMP_arg9]], %[[TMP_arg10]], %[[TMP_arg11]], %[[TMP_arg12]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } do { +// CHECK: ^bb0(%[[TMP_arg7]]: memref<1xindex>, %[[TMP_arg8]]: memref<1xindex>, %[[TMP_arg9]]: memref<3xindex>, %[[TMP_arg10]]: memref, %[[TMP_arg11]]: memref, %[[TMP_arg12]]: memref): +// CHECK: scf.yield %[[TMP_arg7]], %[[TMP_arg8]], %[[TMP_arg9]], %[[TMP_arg10]], %[[TMP_arg11]], %[[TMP_arg12]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } +// CHECK: return %[[SV]]#0, %[[SV]]#1, %[[SV]]#2, %[[SV]]#3, %[[SV]]#4, %[[SV]]#5 : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +func.func @while(%arg0: tensor<1024xf32, #SparseVector>, %c: i1) -> tensor<1024xf32, #SparseVector> { + %0 = scf.while (%arg4 = %arg0) : (tensor<1024xf32, #SparseVector>) -> tensor<1024xf32, #SparseVector> { + scf.condition(%c) %arg4 : tensor<1024xf32, #SparseVector> + } do { + ^bb0(%arg7: tensor<1024xf32, #SparseVector>): + scf.yield %arg7 : tensor<1024xf32, #SparseVector> + } + return %0: tensor<1024xf32, #SparseVector> +} From c10a8473f48bca32ef5e8ab78d30e3557e66d431 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Wed, 2 Nov 2022 17:00:58 +0000 Subject: [PATCH 048/516] [Assignment Tracking][2/*] Add flags to enable Assignment Tracking The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Enable in clang: -Xclang -fexperimental-assignment-tracking Enable in llvm tools: -experimental-assignment-tracking When assignment tracking is enabled in clang it will pass on the flag to enable the feature in lllvm. It's undefined behaviour to read IR that contains assignment tracking metadata without specifying the feature flags. Tests will come with later patches that add assignment tracking features. Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D132221 --- clang/include/clang/Basic/CodeGenOptions.def | 4 ++++ clang/include/clang/Driver/Options.td | 5 ++++ clang/lib/Driver/ToolChains/Clang.cpp | 25 +++++++++++--------- llvm/include/llvm/IR/DebugInfo.h | 2 ++ llvm/lib/IR/DebugInfo.cpp | 7 ++++++ 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 258ba1298f90c..183cb0c71a117 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -331,6 +331,10 @@ VALUE_CODEGENOPT(StackProbeSize , 32, 4096) ///< Overrides default stack VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX) ///< Set via -fwarn-stack-size. CODEGENOPT(NoStackArgProbe, 1, 0) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(DebugStrictDwarf, 1, 1) ///< Whether or not to use strict DWARF info. + +CODEGENOPT(EnableAssignmentTracking, 1,0) ///< Enable the Assignment Tracking + ///< debug info feature feature. + CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information ///< in debug info. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d660a2b886033..b2f334f9f8144 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5638,6 +5638,11 @@ def fctor_dtor_return_this : Flag<["-"], "fctor-dtor-return-this">, } // let Flags = [CC1Option, NoDriverOption] +def fexperimental_assignment_tracking : + Flag<["-"], "fexperimental-assignment-tracking">, Group, + HelpText<"Enable assignment tracking debug info">, + MarshallingInfoFlag>; + //===----------------------------------------------------------------------===// // Dependency Output Options //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 160eb1f23fba2..f87325141b7eb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6987,18 +6987,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option // parser. - // -finclude-default-header flag is for preprocessor, - // do not pass it to other cc1 commands when save-temps is enabled - if (C.getDriver().isSaveTempsEnabled() && - !isa(JA)) { - for (auto *Arg : Args.filtered(options::OPT_Xclang)) { - Arg->claim(); - if (StringRef(Arg->getValue()) != "-finclude-default-header") - CmdArgs.push_back(Arg->getValue()); + for (auto Arg : Args.filtered(options::OPT_Xclang)) { + Arg->claim(); + // -finclude-default-header flag is for preprocessor, + // do not pass it to other cc1 commands when save-temps is enabled + if (C.getDriver().isSaveTempsEnabled() && + !isa(JA)) { + if (StringRef(Arg->getValue()) == "-finclude-default-header") + continue; } - } - else { - Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); + if (StringRef(Arg->getValue()) == "-fexperimental-assignment-tracking") { + // Add the llvm version of this flag too. + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-experimental-assignment-tracking"); + } + CmdArgs.push_back(Arg->getValue()); } for (const Arg *A : Args.filtered(options::OPT_mllvm)) { A->claim(); diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 730c69d0c622e..b35d447a7c891 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -159,6 +159,8 @@ class DebugInfoFinder { SmallPtrSet NodesSeen; }; +/// Return true if assignment tracking is enabled. +bool getEnableAssignmentTracking(); } // end namespace llvm #endif // LLVM_IR_DEBUGINFO_H diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index be09d14adf0ee..8f6d58cb90b90 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -39,6 +39,13 @@ using namespace llvm; using namespace llvm::dwarf; +static cl::opt + ExperimentalAssignmentTracking("experimental-assignment-tracking", + cl::init(false)); +bool llvm::getEnableAssignmentTracking() { + return ExperimentalAssignmentTracking; +} + /// Finds all intrinsics declaring local variables as living in the memory that /// 'V' points to. This may include a mix of dbg.declare and /// dbg.addr intrinsics. From 8acb881c19270ac487c8fc1d0041bc257d824dd5 Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Wed, 2 Nov 2022 10:11:34 -0700 Subject: [PATCH 049/516] [PGO] Add a threshold for number of critical edges in PGO For some auto-generated sources, we have a huge number of critical edges (like from switch statements). We have seen instance of 183777 critical edges in one function. After we split the critical edges in PGO instrumentation/profile-use pass, the CFG is so large that we have compiler time issues in downstream passes (like in machine CSE and block placement). Here I add a threshold to skip PGO if the number of critical edges are too large. The threshold is large enough so that it will not affect the majority of PGO compilation. Also sync the logic for skipping instrumentation and profile-use. I think this is the correct thing to do. Differential Revision: https://reviews.llvm.org/D137184 --- .../Instrumentation/PGOInstrumentation.cpp | 49 +++++++++++++++---- .../PGOProfile/critical-edge-threshold.ll | 27 ++++++++++ 2 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index d627c3eb5d803..5b5f88d78b3b2 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -302,12 +302,17 @@ static cl::opt PGOTraceFuncHash( static cl::opt PGOFunctionSizeThreshold( "pgo-function-size-threshold", cl::Hidden, - cl::desc("Do not instrument functions smaller than this threshold")); + cl::desc("Do not instrument functions smaller than this threshold.")); static cl::opt MatchMemProf( "pgo-match-memprof", cl::init(true), cl::Hidden, cl::desc("Perform matching and annotation of memprof profiles.")); +static cl::opt PGOFunctionCriticalEdgeThreshold( + "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, + cl::desc("Do not instrument functions with the number of critical edges " + " greater than this threshold.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -1846,6 +1851,38 @@ static void collectComdatMembers( ComdatMembers.insert(std::make_pair(C, &GA)); } +// Don't perform PGO instrumeatnion / profile-use. +static bool skipPGO(const Function &F) { + if (F.isDeclaration()) + return true; + if (F.hasFnAttribute(llvm::Attribute::NoProfile)) + return true; + if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) + return true; + if (F.getInstructionCount() < PGOFunctionSizeThreshold) + return true; + + // If there are too many critical edges, PGO might cause + // compiler time problem. Skip PGO if the number of + // critical edges execeed the threshold. + unsigned NumCriticalEdges = 0; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + if (isCriticalEdge(TI, I)) + NumCriticalEdges++; + } + } + if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { + LLVM_DEBUG(dbgs() << "In func " << F.getName() + << ", NumCriticalEdges=" << NumCriticalEdges + << " exceed the threshold. Skip PGO.\n"); + return true; + } + + return false; +} + static bool InstrumentAllFunctions( Module &M, function_ref LookupTLI, function_ref LookupBPI, @@ -1858,13 +1895,7 @@ static bool InstrumentAllFunctions( collectComdatMembers(M, ComdatMembers); for (auto &F : M) { - if (F.isDeclaration()) - continue; - if (F.hasFnAttribute(llvm::Attribute::NoProfile)) - continue; - if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) - continue; - if (F.getInstructionCount() < PGOFunctionSizeThreshold) + if (skipPGO(F)) continue; auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); @@ -2092,7 +2123,7 @@ static bool annotateAllFunctions( if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; for (auto &F : M) { - if (F.isDeclaration()) + if (skipPGO(F)) continue; auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); diff --git a/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll b/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll new file mode 100644 index 0000000000000..d70fbaa791057 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll @@ -0,0 +1,27 @@ +; Test the critical edge threahold +; RUN: opt < %s -passes=pgo-instr-gen -pgo-critical-edge-threshold=1 -pgo-instrument-entry=true -S | FileCheck %s + +@sum = dso_local global i32 0, align 4 + +define void @foo(i32 %a, i32 %b) { +entry: + %tobool.not = icmp eq i32 %a, 0 + br i1 %tobool.not, label %if.end4, label %if.then + +if.then: + %0 = load i32, ptr @sum, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, ptr @sum, align 4 + %tobool1.not = icmp eq i32 %b, 0 + br i1 %tobool1.not, label %if.end4, label %if.then2 + +if.then2: + %inc3 = add nsw i32 %0, 2 + store i32 %inc3, ptr @sum, align 4 + br label %if.end4 + +if.end4: + ret void +} + +; CHECK-NOT: call void @llvm.instrprof.increment(ptr @__profn_foo From a8604f2254f6ec9f5e775e04a2da968e3bc998ad Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Wed, 2 Nov 2022 16:13:02 +0000 Subject: [PATCH 050/516] [ASAN] Removed special case controlling allocator constants for __aarch64__. This patch should land before D137136 to make sure that the leak sanitizer allocator works correctly. This patch is NFC without D137136. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D137265 --- compiler-rt/lib/asan/asan_allocator.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 27d826fb613ae..0b4dbf03bb9d5 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -135,12 +135,6 @@ typedef VeryCompactSizeClassMap SizeClassMap; const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x2000000000ULL; // 128G. typedef VeryDenseSizeClassMap SizeClassMap; -# elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -const uptr kAllocatorSpace = 0x10000000000ULL; -const uptr kAllocatorSize = 0x10000000000ULL; // 3T. -typedef DefaultSizeClassMap SizeClassMap; #elif defined(__sparc__) const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x20000000000ULL; // 2T. From c061892fcdbdfe46884c54a7a7bfe6df54d1df12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 1 Nov 2022 04:23:36 +0100 Subject: [PATCH 051/516] [llvm-config] Remove --src-root option Remove the `--src-root` option from the deprecated llvm-config tool. None of the llvm-project projects use this option anymore. The value was only meaningful for in-tree use and usually became no longer correct once LLVM was installed -- either because it was built in a temporary directory, or installed from a binary package and built on a different system entirely. Therefore, third-party tools could not have been relying on it anyway. The LLVM_SRC_ROOT #define is left intact, as it is used to compute includedir when llvm-config is used in-source. Differential Revision: https://reviews.llvm.org/D137144 --- llvm/docs/CommandGuide/llvm-config.rst | 4 ---- llvm/tools/llvm-config/llvm-config.cpp | 3 --- 2 files changed, 7 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-config.rst b/llvm/docs/CommandGuide/llvm-config.rst index 2dddbcc53dc6a..63658d0d90452 100644 --- a/llvm/docs/CommandGuide/llvm-config.rst +++ b/llvm/docs/CommandGuide/llvm-config.rst @@ -130,10 +130,6 @@ OPTIONS Print how the provided components can be collectively linked (`shared` or `static`). -**--src-root** - - Print the source root from which LLVM was built. - **--system-libs** Print all the system libraries needed to link against the specified LLVM diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp index 8b28a00b26236..b1d795a0a3491 100644 --- a/llvm/tools/llvm-config/llvm-config.cpp +++ b/llvm/tools/llvm-config/llvm-config.cpp @@ -234,7 +234,6 @@ Options:\n\ --obj-root Print the object root used to build LLVM.\n\ --prefix Print the installation prefix.\n\ --shared-mode Print how the provided components can be collectively linked (`shared` or `static`).\n\ - --src-root Print the source root LLVM was built from.\n\ --system-libs System Libraries needed to link against LLVM components.\n\ --targets-built List of all targets currently built.\n\ --version Print LLVM version.\n\ @@ -592,8 +591,6 @@ int main(int argc, char **argv) { PrintSharedMode = true; } else if (Arg == "--obj-root") { OS << ActivePrefix << '\n'; - } else if (Arg == "--src-root") { - OS << LLVM_SRC_ROOT << '\n'; } else if (Arg == "--ignore-libllvm") { LinkDyLib = false; LinkMode = BuiltSharedLibs ? LinkModeShared : LinkModeAuto; From 13cd39017de07a116c8901904fd4cf7aa290a47c Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 31 Oct 2022 14:50:04 -0700 Subject: [PATCH 052/516] [lldb] Add information on type systems to statistics dump command Context: I plan on using this change primarily downstream in the apple fork of llvm to track swift module loading time. Reviewed By: clayborg, tschuett Differential Revision: https://reviews.llvm.org/D137191 --- lldb/include/lldb/Core/Module.h | 3 +++ lldb/include/lldb/Symbol/TypeSystem.h | 3 +++ lldb/include/lldb/Target/Statistics.h | 2 ++ lldb/source/Core/Module.cpp | 5 +++++ lldb/source/Symbol/TypeSystem.cpp | 4 ++++ lldb/source/Target/Statistics.cpp | 16 ++++++++++++++++ 6 files changed, 33 insertions(+) diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index e877a14dcda10..523e04c6e6b4c 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -29,6 +29,7 @@ #include "lldb/lldb-types.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Chrono.h" @@ -814,6 +815,8 @@ class Module : public std::enable_shared_from_this, llvm::Expected GetTypeSystemForLanguage(lldb::LanguageType language); + void ForEachTypeSystem(llvm::function_ref callback); + // Special error functions that can do printf style formatting that will // prepend the message with something appropriate for this module (like the // architecture, path and object name (if any)). This centralizes code so diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index fd31b130c4ffd..0da0e35a4f9ca 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" +#include "llvm/Support/JSON.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Expression/Expression.h" @@ -508,6 +509,8 @@ class TypeSystem : public PluginInterface { // meaningless type itself, instead preferring to use the dynamic type virtual bool IsMeaninglessWithoutDynamicResolution(void *type); + virtual llvm::Optional ReportStatistics(); + protected: SymbolFile *m_sym_file = nullptr; }; diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index db6494ce7899e..4bf2f3a69c9b1 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -12,6 +12,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-forward.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/JSON.h" #include #include @@ -107,6 +108,7 @@ struct ModuleStats { // identifiers of these modules in the global module list. This allows us to // track down all of the stats that contribute to this module. std::vector symfile_modules; + llvm::StringMap type_system_stats; double symtab_parse_time = 0.0; double symtab_index_time = 0.0; double debug_parse_time = 0.0; diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index d5b4621880dcd..20bd02f101fcc 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -369,6 +369,11 @@ Module::GetTypeSystemForLanguage(LanguageType language) { return m_type_system_map.GetTypeSystemForLanguage(language, this, true); } +void Module::ForEachTypeSystem( + llvm::function_ref callback) { + m_type_system_map.ForEach(callback); +} + void Module::ParseAllDebugSymbols() { std::lock_guard guard(m_mutex); size_t num_comp_units = GetNumCompileUnits(); diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 412373533aaba..ae5ae5cbd659a 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -178,6 +178,10 @@ TypeSystem::CreateUtilityFunction(std::string text, std::string name) { return {}; } +llvm::Optional TypeSystem::ReportStatistics() { + return llvm::None; +} + #pragma mark TypeSystemMap TypeSystemMap::TypeSystemMap() : m_mutex(), m_map() {} diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp index 0ea09743d1300..118d6c396172c 100644 --- a/lldb/source/Target/Statistics.cpp +++ b/lldb/source/Target/Statistics.cpp @@ -75,6 +75,17 @@ json::Value ModuleStats::ToJSON() const { symfile_ids.emplace_back(symfile_id); module.try_emplace("symbolFileModuleIdentifiers", std::move(symfile_ids)); } + + if (!type_system_stats.empty()) { + json::Array type_systems; + for (const auto &entry : type_system_stats) { + json::Object obj; + obj.try_emplace(entry.first().str(), entry.second); + type_systems.emplace_back(std::move(obj)); + } + module.try_emplace("typeSystemInfo", std::move(type_systems)); + } + return module; } @@ -256,6 +267,11 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger, debug_parse_time += module_stat.debug_parse_time; debug_index_time += module_stat.debug_index_time; debug_info_size += module_stat.debug_info_size; + module->ForEachTypeSystem([&](TypeSystem *ts) { + if (auto stats = ts->ReportStatistics()) + module_stat.type_system_stats.insert({ts->GetPluginName(), *stats}); + return true; + }); json_modules.emplace_back(module_stat.ToJSON()); } From 5b30fc23696eb9c01c1182a438709120853cf06d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Sun, 23 Oct 2022 12:02:59 -0700 Subject: [PATCH 053/516] [opt] Add -p alias for -passes See [1] for background. Some people have complained that `opt -passes=instcombine` is a lot more typing than `opt -instcombine`. As a compromise that nobody has objected to in [1], allow `opt -p instcombine`. [1] https://discourse.llvm.org/t/rfc-legacy-opt-pass-syntax-with-new-pass-manager/65863 Reviewed By: bjope, asbirlea Differential Revision: https://reviews.llvm.org/D136616 --- llvm/docs/NewPassManager.rst | 2 ++ llvm/test/Other/new-pass-manager.ll | 3 +++ llvm/tools/opt/opt.cpp | 2 ++ 3 files changed, 7 insertions(+) diff --git a/llvm/docs/NewPassManager.rst b/llvm/docs/NewPassManager.rst index 9390afa300ca5..68a1ac1e0baa5 100644 --- a/llvm/docs/NewPassManager.rst +++ b/llvm/docs/NewPassManager.rst @@ -430,6 +430,8 @@ To use the new PM: .. code-block:: shell $ opt -passes='pass1,pass2' /tmp/a.ll -S + # -p is an alias for -passes + $ opt -p pass1,pass2 /tmp/a.ll -S The new PM typically requires explicit pass nesting. For example, to run a function pass, then a module pass, we need to wrap the function pass in a module diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll index f9cea34e22226..5be67a34a9174 100644 --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -8,6 +8,9 @@ ; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ ; RUN: -passes=no-op-module %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-MODULE-PASS +; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ +; RUN: -p no-op-module %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MODULE-PASS ; CHECK-MODULE-PASS: Running pass: NoOpModulePass ; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 62bdd6e095014..8dac02a356b18 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -85,6 +85,8 @@ static cl::opt PassPipeline( cl::desc( "A textual description of the pass pipeline. To have analysis passes " "available before a certain pass, add 'require'.")); +static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline), + cl::desc("Alias for -passes")); static cl::opt PrintPasses("print-passes", cl::desc("Print available passes that can be " From 76b04c2beb7e2a067ff41db65eca5d332d579833 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 24 Oct 2022 10:45:51 -0700 Subject: [PATCH 054/516] [docs][NewPM] Move pass plugin documentation into existing new PM docs Reviewed By: awarzynski, asbirlea Differential Revision: https://reviews.llvm.org/D136626 --- llvm/docs/NewPassManager.rst | 5 +++ llvm/docs/WritingAnLLVMNewPMPass.rst | 55 ++++++++++++++++++++++++++++ llvm/docs/WritingAnLLVMPass.rst | 45 ----------------------- 3 files changed, 60 insertions(+), 45 deletions(-) diff --git a/llvm/docs/NewPassManager.rst b/llvm/docs/NewPassManager.rst index 68a1ac1e0baa5..3d0bcdeb4ccb7 100644 --- a/llvm/docs/NewPassManager.rst +++ b/llvm/docs/NewPassManager.rst @@ -179,6 +179,11 @@ sanitizer) passes to various parts of the pipeline. ``AMDGPUTargetMachine::registerPassBuilderCallbacks()`` is an example of a backend adding passes to various parts of the pipeline. +Pass plugins can also add passes into default pipelines. Different tools have +different ways of loading dynamic pass plugins. For example, ``opt +-load-pass-plugin=path/to/plugin.so`` loads a pass plugin into ``opt``. For +information on writing a pass plugin, see :doc:`WritingAnLLVMNewPMPass`. + Using Analyses ============== diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.rst index dbedc4bb6be9b..799863c7864ec 100644 --- a/llvm/docs/WritingAnLLVMNewPMPass.rst +++ b/llvm/docs/WritingAnLLVMNewPMPass.rst @@ -232,3 +232,58 @@ function. Required passes will still be run on ``optnone`` functions. For more implementation details, see ``PassInstrumentation::runBeforePass()``. + +Registering passes as plugins +----------------------------- + +LLVM provides a mechanism to register pass plugins within various tools like +``clang`` or ``opt``. A pass plugin can add passes to default optimization +pipelines or to be manually run via tools like ``opt``. For more information, +see :doc:`NewPassManager`. + +Create a CMake project at the root of the repo alongside +other projects. This project must contain the following minimal +``CMakeLists.txt``: + +.. code-block:: cmake + + add_llvm_pass_plugin(MyPassName source.cpp) + +See the definition of ``add_llvm_pass_plugin`` for more CMake details. + +The pass must provide at least one of two entry points for the new pass manager, +one for static registration and one for dynamically loaded plugins: + +- ``llvm::PassPluginLibraryInfo get##Name##PluginInfo();`` +- ``extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;`` + +Pass plugins are compiled and linked dynamically by default. Setting +``LLVM_${NAME}_LINK_INTO_TOOLS`` to ``ON`` turns the project into a statically +linked extension. + +For an in-tree example, see ``llvm/examples/Bye/``. + +To make ``PassBuilder`` aware of statically linked pass plugins: + +.. code-block:: c++ + + // Declare plugin extension function declarations. + #define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); + #include "llvm/Support/Extension.def" + + ... + + // Register plugin extensions in PassBuilder. + #define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); + #include "llvm/Support/Extension.def" + +To make ``PassBuilder`` aware of dynamically linked pass plugins: + +.. code-block:: c++ + + // Load plugin dynamically. + auto Plugin = PassPlugin::Load(PathToPlugin); + if (!Plugin) + report_error(); + // Register plugin extensions in PassBuilder. + Plugin.registerPassBuilderCallbacks(PB); diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index b644def6cbeb5..2b2ac719ef589 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -1183,51 +1183,6 @@ implement ``releaseMemory`` to, well, release the memory allocated to maintain this internal state. This method is called after the ``run*`` method for the class, before the next call of ``run*`` in your pass. -Building pass plugins -===================== - -As an alternative to using ``PLUGIN_TOOL``, LLVM provides a mechanism to -automatically register pass plugins within ``clang``, ``opt`` and ``bugpoint``. -One first needs to create an independent project and add it to either ``tools/`` -or, using the MonoRepo layout, at the root of the repo alongside other projects. -This project must contain the following minimal ``CMakeLists.txt``: - -.. code-block:: cmake - - add_llvm_pass_plugin(Name source0.cpp) - -The pass must provide two entry points for the new pass manager, one for static -registration and one for dynamically loaded plugins: - -- ``llvm::PassPluginLibraryInfo get##Name##PluginInfo();`` -- ``extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;`` - -Pass plugins are compiled and link dynamically by default, but it's -possible to set the following variables to change this behavior: - -- ``LLVM_${NAME}_LINK_INTO_TOOLS``, when set to ``ON``, turns the project into - a statically linked extension - - -When building a tool that uses the new pass manager, one can use the following snippet to -include statically linked pass plugins: - -.. code-block:: c++ - - // fetch the declaration - #define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); - #include "llvm/Support/Extension.def" - - [...] - - // use them, PB is an llvm::PassBuilder instance - #define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); - #include "llvm/Support/Extension.def" - - - - - Registering dynamically loaded passes ===================================== From 4fa328074efd7eefdbb314b8f6e9f855e443ca20 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Sun, 30 Oct 2022 09:52:20 -0700 Subject: [PATCH 055/516] [NewPM][Pipeline] Add PipelineTuningOption to set inliner threshold The legacy PM allowed you to set a custom inliner threshold via builder.Inliner = llvm::createFunctionInliningPass(inline_threshold); This allows the same thing to be done with the new PM optimization pipelines. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D137038 --- llvm/include/llvm/Passes/PassBuilder.h | 3 +++ llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 68f07f39afa63..d7d76c30d1a6b 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -75,6 +75,9 @@ class PipelineTuningOptions { /// false. bool MergeFunctions; + /// Tuning option to override the default inliner threshold. + int InlinerThreshold; + // Experimental option to eagerly invalidate more analyses. This has the // potential to decrease max memory usage in exchange for more compile time. // This may affect codegen due to either passes using analyses only when diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4f1ef22775db8..b32d53d8dfaf8 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -200,6 +200,7 @@ PipelineTuningOptions::PipelineTuningOptions() { LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; MergeFunctions = EnableMergeFunctions; + InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } @@ -719,7 +720,11 @@ static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { ModuleInlinerWrapperPass PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { - InlineParams IP = getInlineParamsFromOptLevel(Level); + InlineParams IP; + if (PTO.InlinerThreshold == -1) + IP = getInlineParamsFromOptLevel(Level); + else + IP = getInlineParams(PTO.InlinerThreshold); // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to // disable hot callsite inline (as much as possible [1]) because it makes // profile annotation in the backend inaccurate. From ffdbbd112c14bdc6d975fc87d9efebea766a6a95 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 10:38:29 -0700 Subject: [PATCH 056/516] AMDGPU: Directly pass Function to mayUseAGPRs This was taking the MachineFunction, but only inspecting the underlying IR. --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 6 +++--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 99967507a200f..80ce18e55c499 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -106,7 +106,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(MF)) + !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } @@ -664,8 +664,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( return false; } -bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { - for (const BasicBlock &BB : MF.getFunction()) { +bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { const auto *CB = dyn_cast(&I); if (!CB) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index aff679deb069d..f603244086a9c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -991,7 +991,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. - bool mayUseAGPRs(const MachineFunction &MF) const; + bool mayUseAGPRs(const Function &F) const; // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; From e4b126cc2d33033a5538d72a88f6aa153ac8b757 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 10:39:50 -0700 Subject: [PATCH 057/516] llvm-reduce: Require x86 to run file ouput test The MIR test somewhat depends on target support. --- llvm/test/tools/llvm-reduce/file-output-type.test | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test index 9a00d8a00b76d..bbfbeb2fc158a 100644 --- a/llvm/test/tools/llvm-reduce/file-output-type.test +++ b/llvm/test/tools/llvm-reduce/file-output-type.test @@ -1,3 +1,4 @@ +# REQUIRES: x86-registered-target # RUN: rm -f reduced.ll reduced.bc # RUN: llvm-as -o test-output-format.bc %p/Inputs/test-output-format.ll From 16e1a49441c51817697138437d8db2c15bc19cb4 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 2 Nov 2022 10:57:24 -0700 Subject: [PATCH 058/516] Revert "[cmake][msvc] Enable standards-conforming preprocessor" This reverts commit 12d8e7c6ade55bba241259312e3e4bdcf6aeab81. The Windows MLIR buildbot started failing with: C:\Program Files (x86)\Windows Kits\10\include\10.0.19041.0\um\winbase.h(9531): error C2220: the following warning is treated as an error C:\Program Files (x86)\Windows Kits\10\include\10.0.19041.0\um\winbase.h(9531): warning C5105: macro expansion producing 'defined' has undefined behavior C:\Program Files (x86)\Windows Kits\10\include\10.0.19041.0\um\winbase.h(9531): note: to simplify migration, consider the temporary use of /Wv:18 flag with the version of the compiler with which you used to build without warnings --- llvm/cmake/modules/HandleLLVMOptions.cmake | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 8be51f74a9814..7828e8a1627f2 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -481,10 +481,6 @@ if( MSVC ) append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - # Enable standards-conforming preprocessor. - # https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor - append("/Zc:preprocessor" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) - # Some projects use the __cplusplus preprocessor macro to check support for # a particular version of the C++ standard. When this option is not specified # explicitly, macro's value is "199711L" that implies C++98 Standard. From d1fbdf5bf79219549bc1fde255186d02f646a46f Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Wed, 2 Nov 2022 12:29:42 -0400 Subject: [PATCH 059/516] [llvm-tblgen] NFC: Small code refactor in DecoderEmitter. Extracts part of populateInstruction into a separate addOneOperandFields function. --- llvm/utils/TableGen/DecoderEmitter.cpp | 143 +++++++++++++------------ 1 file changed, 74 insertions(+), 69 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 05d81bae0a9d3..f46cb4c77f6dd 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -1906,6 +1906,69 @@ void parseVarLenInstOperand(const Record &Def, } } +static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, + std::map &TiedNames, + StringRef OpName, OperandInfo &OpInfo) { + // Some bits of the operand may be required to be 1 depending on the + // instruction's encoding. Collect those bits. + if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) + if (const BitsInit *OpBits = dyn_cast(EncodedValue->getValue())) + for (unsigned I = 0; I < OpBits->getNumBits(); ++I) + if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) + if (OpBit->getValue()) + OpInfo.InitValue |= 1ULL << I; + + unsigned Base = ~0U; + unsigned Width = 0; + unsigned Offset = 0; + + for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { + VarInit *Var = nullptr; + VarBitInit *BI = dyn_cast(Bits.getBit(bi)); + if (BI) + Var = dyn_cast(BI->getBitVar()); + else + Var = dyn_cast(Bits.getBit(bi)); + + if (!Var) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if ((Var->getName() != OpName && + Var->getName() != TiedNames[std::string(OpName)])) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if (Base == ~0U) { + Base = bi; + Width = 1; + Offset = BI ? BI->getBitNum() : 0; + } else if (BI && BI->getBitNum() != Offset + Width) { + OpInfo.addField(Base, Width, Offset); + Base = bi; + Width = 1; + Offset = BI->getBitNum(); + } else { + ++Width; + } + } + + if (Base != ~0U) + OpInfo.addField(Base, Width, Offset); +} + static unsigned populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, const CodeGenInstruction &CGI, unsigned Opc, @@ -2119,21 +2182,24 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, // For each operand, see if we can figure out where it is encoded. for (const auto &Op : InOutOperands) { + Init *OpInit = Op.first; + StringRef OpName = Op.second; + if (SupportPositionalDecoding) { - if (!NumberedInsnOperands[std::string(Op.second)].empty()) { + if (!NumberedInsnOperands[std::string(OpName)].empty()) { llvm::append_range(InsnOperands, - NumberedInsnOperands[std::string(Op.second)]); + NumberedInsnOperands[std::string(OpName)]); continue; } - if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) { + if (!NumberedInsnOperands[TiedNames[std::string(OpName)]].empty()) { if (!NumberedInsnOperandsNoTie.count( - TiedNames[std::string(Op.second)])) { + TiedNames[std::string(OpName)])) { // Figure out to which (sub)operand we're tied. unsigned i = - CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]); + CGI.Operands.getOperandNamed(TiedNames[std::string(OpName)]); int tiedTo = CGI.Operands[i].getTiedRegister(); if (tiedTo == -1) { - i = CGI.Operands.getOperandNamed(Op.second); + i = CGI.Operands.getOperandNamed(OpName); tiedTo = CGI.Operands[i].getTiedRegister(); } @@ -2142,7 +2208,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, CGI.Operands.getSubOperandNumber(tiedTo); InsnOperands.push_back( - NumberedInsnOperands[TiedNames[std::string(Op.second)]] + NumberedInsnOperands[TiedNames[std::string(OpName)]] [SO.second]); } } @@ -2154,76 +2220,15 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, // to interpret it. As a first step, require the target to provide // callbacks for decoding register classes. - Init *OpInit = Op.first; if (DagInit *Dag = dyn_cast(OpInit)) OpInit = Dag->getOperator(); OperandInfo OpInfo = getOpInfo(cast(OpInit)->getDef()); - // Some bits of the operand may be required to be 1 depending on the - // instruction's encoding. Collect those bits. - if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) - if (const BitsInit *OpBits = - dyn_cast(EncodedValue->getValue())) - for (unsigned I = 0; I < OpBits->getNumBits(); ++I) - if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) - if (OpBit->getValue()) - OpInfo.InitValue |= 1ULL << I; - - unsigned Base = ~0U; - unsigned Width = 0; - unsigned Offset = 0; - - for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { - VarInit *Var = nullptr; - VarBitInit *BI = dyn_cast(Bits.getBit(bi)); - if (BI) - Var = dyn_cast(BI->getBitVar()); - else - Var = dyn_cast(Bits.getBit(bi)); - - if (!Var) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if ((Var->getName() != Op.second && - Var->getName() != TiedNames[std::string(Op.second)])) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if (Base == ~0U) { - Base = bi; - Width = 1; - Offset = BI ? BI->getBitNum() : 0; - } else if (BI && BI->getBitNum() != Offset + Width) { - OpInfo.addField(Base, Width, Offset); - Base = bi; - Width = 1; - Offset = BI->getBitNum(); - } else { - ++Width; - } - } - - if (Base != ~0U) - OpInfo.addField(Base, Width, Offset); - + addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); if (OpInfo.numFields() > 0) InsnOperands.push_back(OpInfo); } } - Operands[Opc] = InsnOperands; #if 0 From c050dd4717ec4317bd45adfca8243cb9ea7b6370 Mon Sep 17 00:00:00 2001 From: Hanhan Wang Date: Wed, 2 Nov 2022 11:02:48 -0700 Subject: [PATCH 060/516] [mlir][linalg] Add support for vectorizing convs that have different types. Reviewed By: dcaballe Differential Revision: https://reviews.llvm.org/D137208 --- .../Linalg/Transforms/Vectorization.cpp | 2 +- .../Dialect/Linalg/vectorize-convolution.mlir | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index d565efb30241d..cedec72b9cb33 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1465,7 +1465,7 @@ struct Conv1DGenerator : public StructuredGenerator { return; for (Value operand : mulOp->getOperands()) { if (Operation *def = operand.getDefiningOp()) { - if (!isa(def)) + if (!isa(def)) return; operand = def->getOperand(0); } diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index e7495765b3ec7..1374c996128a1 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -61,6 +61,70 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // ----- +// The i8i8i32 case is similar to f32 case, so checking one case is enough for +// test coverage. +func.func @conv1d_nwc_4x2x8_i8i8i32_memref(%input: memref<4x6x3xi8>, %filter: memref<1x3x8xi8>, %output: memref<4x2x8xi32>) { + linalg.conv_1d_nwc_wcf + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>) + outs(%output : memref<4x2x8xi32>) + return +} + +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_nwc_4x2x8_i8i8i32_memref +// CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xi8>, %[[FILTER:.+]]: memref<1x3x8xi8>, %[[OUTPUT:.+]]: memref<4x2x8xi32>) + +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C0_I8:.+]] = arith.constant 0 : i8 +// CHECK-DAG: %[[C0_I32:.+]] = arith.constant 0 : i32 + +/// Read the whole data in one shot. +// CHECK-DAG: %[[V_INPUT_R:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I8]] +// CHECK-DAG: %[[V_FILTER_R:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I8]] +// CHECK-DAG: %[[V_OUTPUT_R:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I32]] + +// CHECK: %[[V_INPUT_0:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> +// CHECK: %[[V_INPUT_1:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> + +// CHECK: %[[V_FILTER:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<1x3x8xi8> + +// CHECK: %[[V_OUTPUT_0:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xi32> to vector<4x1x8xi32> +// CHECK: %[[V_OUTPUT_1:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 1, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xi32> to vector<4x1x8xi32> + +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] +// CHECK-SAME: : vector<4x1x3xi8>, vector<3x8xi8> into vector<4x1x8xi32> + +/// w == 1, kw == 0 +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] +// CHECK-SAME: : vector<4x1x3xi8>, vector<3x8xi8> into vector<4x1x8xi32> + +/// w == 0, kw == 0 +// CHECK: %[[RES_0:.+]] = vector.insert_strided_slice %[[CONTRACT_0]], %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x8xi32> into vector<4x2x8xi32> +/// w == 1, kw == 0 +// CHECK: %[[RES_1:.+]] = vector.insert_strided_slice %[[CONTRACT_1]], %[[RES_0]] +// CHECK-SAME: {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x8xi32> into vector<4x2x8xi32> + +// Write the result back in one shot. +// CHECK: vector.transfer_write %[[RES_1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] + +// ----- + func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf32>, %output: memref<4x2x8xf32>) { linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} From b478d8b966672f2b96ce343ea6773a956b7da8e3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 2 Nov 2022 18:13:34 +0000 Subject: [PATCH 061/516] [ConstraintElimination] Generate true/false vectors for vector cmps. This fixes crashes when vector compares can be simplified to true/false. --- .../Scalar/ConstraintElimination.cpp | 14 ++++++++++-- .../ConstraintElimination/geps-ptrvector.ll | 22 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 8b9076aff8fa9..375aa4e2cd440 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -746,6 +746,12 @@ void State::addInfoFor(BasicBlock &BB) { WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); } +static Constant *getScalarConstOrSplat(ConstantInt *C, Type *Ty) { + if (auto *VTy = dyn_cast(Ty)) + return ConstantVector::getSplat(VTy->getElementCount(), C); + return C; +} + static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); CmpInst::Predicate Pred = Cmp->getPredicate(); @@ -780,7 +786,9 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dbgs() << "Condition " << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); - Cmp->replaceUsesWithIf(ConstantInt::getTrue(Ctx), [](Use &U) { + Constant *TrueC = + getScalarConstOrSplat(ConstantInt::getTrue(Ctx), Cmp->getType()); + Cmp->replaceUsesWithIf(TrueC, [](Use &U) { // Conditions in an assume trivially simplify to true. Skip uses // in assume calls to not destroy the available information. auto *II = dyn_cast(U.getUser()); @@ -797,7 +805,9 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dbgs() << "Condition !" << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); - Cmp->replaceAllUsesWith(ConstantInt::getFalse(Ctx)); + Constant *FalseC = + getScalarConstOrSplat(ConstantInt::getFalse(Ctx), Cmp->getType()); + Cmp->replaceAllUsesWith(FalseC); NumCondsRemoved++; Changed = true; } diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index e30830fff7c76..df915653e08e1 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -12,3 +12,25 @@ define <2 x i1> @test.vectorgep(<2 x ptr> %vec) { %cond = icmp ule <2 x ptr> %gep, zeroinitializer ret <2 x i1> %cond } + +define <2 x i1> @test.vectorgep.ult.true(<2 x ptr> %vec) { +; CHECK-LABEL: @test.vectorgep.ult.true( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult <2 x ptr> [[VEC]], [[GEP_1]] +; CHECK-NEXT: ret <2 x i1> +; + %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 + %t.1 = icmp ult <2 x ptr> %vec, %gep.1 + ret <2 x i1> %t.1 +} + +define <2 x i1> @test.vectorgep.ult.false(<2 x ptr> %vec) { +; CHECK-LABEL: @test.vectorgep.ult.false( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult <2 x ptr> [[GEP_1]], [[VEC]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer +; + %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 + %t.1 = icmp ult <2 x ptr> %gep.1, %vec + ret <2 x i1> %t.1 +} From ea82ddd070b78bbdb94b3a2a3f0fdeb2738f1845 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Wed, 2 Nov 2022 11:21:34 -0700 Subject: [PATCH 062/516] [TSan] Adjust `TSAN_RTL_CFLAGS` before it gets copied Add `COMPILER_RT_LIBDISPATCH_CFLAGS` to `TSAN_RTL_CFLAGS` before it gets duplicated to `TSAN_RTL_DYNAMIC_CFLAGS` so both versions have the necessary flags. Reviewed By: wrotki, rsundahl Differential Revision: https://reviews.llvm.org/D137183 --- compiler-rt/lib/tsan/rtl/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index 0a12cb7021f69..84747a552e79f 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -6,6 +6,8 @@ append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530 TSAN_RTL_CFLAGS) append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors TSAN_RTL_CFLAGS) +append_list_if(COMPILER_RT_INTERCEPT_LIBDISPATCH ${COMPILER_RT_LIBDISPATCH_CFLAGS} + TSAN_RTL_CFLAGS) set(TSAN_RTL_DYNAMIC_CFLAGS ${TSAN_RTL_CFLAGS}) list(REMOVE_ITEM TSAN_RTL_DYNAMIC_CFLAGS -fPIE) @@ -75,7 +77,6 @@ if(COMPILER_RT_INTERCEPT_LIBDISPATCH) list(APPEND TSAN_SOURCES tsan_interceptors_libdispatch.cpp ) - list(APPEND TSAN_RTL_CFLAGS ${COMPILER_RT_LIBDISPATCH_CFLAGS}) endif() set(TSAN_HEADERS From 65b130e32cad56e5f11d6d172eb90437e1cedbf6 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 2 Nov 2022 14:32:47 -0400 Subject: [PATCH 063/516] Fix LLVM sphinx build bot This should address the issue found in: https://lab.llvm.org/buildbot/#/builders/30/builds/27824 --- llvm/docs/SourceLevelDebugging.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index 35e1816b47c49..07468b1e75f17 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -253,6 +253,10 @@ the complex expression derives the direct value. ``llvm.dbg.assign`` ^^^^^^^^^^^^^^^^^^^ +.. toctree:: + :hidden: + + AssignmentTracking .. code-block:: llvm From 145d2a50b86fabe220284ca4a28b67cb8d26f1f1 Mon Sep 17 00:00:00 2001 From: yijiagu Date: Wed, 2 Nov 2022 11:27:26 -0700 Subject: [PATCH 064/516] Add Async Function to the Async Dialect Add Async Function to the Async Dialect Today `async.execute` operation semantics requires attached region to be executed in a thread managed by the runtime, and always returns an `!async.token` result. We need to model async functions that are not necessarily executed in a runtime-managed threads, but eventually lowered to llvm coroutines. Example: ``` async.func @foo(%arg0: !async.value) -> !async.token { %0 = async.await %arg0: !async.value "do_something_with_f32"(%0) return } ``` If `arg0` is available this function will be executed in the caller thread. If it's not available it will be suspended and resumed later later on a thread managed by the async runtime. Currently this is not representable with `async.execute` operations. The longer term goal is to make async dialect more like https://github.com/lewissbaker/cppcoro to be able to represent structured host concurrency in MLIR. (1) Add async.func, async.call, and async.return operations in Async Dialect Reviewed By: ezhulenev, rriddle Differential Revision: https://reviews.llvm.org/D137189 --- mlir/include/mlir/Dialect/Async/IR/Async.h | 19 +- .../include/mlir/Dialect/Async/IR/AsyncOps.td | 188 ++++++++++++++++++ mlir/lib/Dialect/Async/IR/Async.cpp | 131 ++++++++++++ mlir/test/Dialect/Async/ops.mlir | 31 +++ mlir/test/Dialect/Async/verify.mlir | 26 +++ .../llvm-project-overlay/mlir/BUILD.bazel | 2 + 6 files changed, 396 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Async/IR/Async.h b/mlir/include/mlir/Dialect/Async/IR/Async.h index 0c60a3c06c131..585a231d24739 100644 --- a/mlir/include/mlir/Dialect/Async/IR/Async.h +++ b/mlir/include/mlir/Dialect/Async/IR/Async.h @@ -18,9 +18,11 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" -#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/FunctionInterfaces.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" @@ -53,4 +55,19 @@ inline bool isRefCounted(Type type) { } // namespace async } // namespace mlir +namespace llvm { + +/// Allow stealing the low bits of async::FuncOp. +template <> +struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(mlir::async::FuncOp val) { + return const_cast(val.getAsOpaquePointer()); + } + static inline mlir::async::FuncOp getFromVoidPointer(void *p) { + return mlir::async::FuncOp::getFromOpaquePointer(p); + } + static constexpr int numLowBitsAvailable = 3; +}; +} // namespace llvm + #endif // MLIR_DIALECT_ASYNC_IR_ASYNC_H diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td index d0584ef34b8e0..c8d3e2cc664b4 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -18,6 +18,11 @@ include "mlir/Dialect/Async/IR/AsyncTypes.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/CallInterfaces.td" +include "mlir/IR/SymbolInterfaces.td" +include "mlir/IR/FunctionInterfaces.td" +include "mlir/IR/OpAsmInterface.td" + //===----------------------------------------------------------------------===// // Async op definitions @@ -99,6 +104,189 @@ def Async_ExecuteOp : }]; } +def Async_FuncOp : Async_Op<"func", + [CallableOpInterface, FunctionOpInterface, + IsolatedFromAbove, OpAsmOpInterface, Symbol]> { + let summary = "async function operation"; + let description = [{ + An async function is like a normal function, but supports non-blocking + await. Internally, async function is lowered to the LLVM coroutinue with + async runtime intrinsic. It can return an async token and/or async values. + The token represents the execution state of async function and can be used + when users want to express dependencies on some side effects, e.g., + the token becomes available once every thing in the func body is executed. + + Example: + + ```mlir + // Async function can't return void, it always must be some async thing. + async.func @async.0() -> !async.token { + return + } + + // Function returns only async value. + async.func @async.1() -> !async.value { + %0 = arith.constant 42 : i32 + return %0 : i32 + } + + // Implicit token can be added to return types. + async.func @async.2() -> !async.token, !async.value { + %0 = arith.constant 42 : i32 + return %0 : i32 + } + ``` + }]; + + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttrOf:$function_type, + OptionalAttr:$sym_visibility); + + let regions = (region AnyRegion:$body); + + let builders = [ + OpBuilder<(ins "StringRef":$name, "FunctionType":$type, + CArg<"ArrayRef", "{}">:$attrs, + CArg<"ArrayRef", "{}">:$argAttrs)> + ]; + + let extraClassDeclaration = [{ + //===------------------------------------------------------------------===// + // CallableOpInterface + //===------------------------------------------------------------------===// + + /// Returns the region on the current operation that is callable. This may + /// return null in the case of an external callable object, e.g. an external + /// function. + ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr + : &getBody(); } + + /// Returns the results types that the callable region produces when + /// executed. + ArrayRef getCallableResults() { return getFunctionType() + .getResults(); } + + //===------------------------------------------------------------------===// + // FunctionOpInterface Methods + //===------------------------------------------------------------------===// + + /// Returns the argument types of this async function. + ArrayRef getArgumentTypes() { return getFunctionType().getInputs(); } + + /// Returns the result types of this async function. + ArrayRef getResultTypes() { return getFunctionType().getResults(); } + + /// Returns the number of results of this async function + unsigned getNumResults() {return getResultTypes().size();} + + /// Is the async func stateful + bool isStateful() { return isa(getFunctionType().getResult(0));} + + //===------------------------------------------------------------------===// + // OpAsmOpInterface Methods + //===------------------------------------------------------------------===// + + /// Allow the dialect prefix to be omitted. + static StringRef getDefaultDialect() { return "async"; } + + //===------------------------------------------------------------------===// + // SymbolOpInterface Methods + //===------------------------------------------------------------------===// + + bool isDeclaration() { return isExternal(); } + }]; + let hasCustomAssemblyFormat = 1; + + let hasVerifier = 1; +} + +def Async_CallOp : Async_Op<"call", + [CallOpInterface, DeclareOpInterfaceMethods]> { + let summary = "async call operation"; + let description = [{ + The `async.call` operation represents a direct call to an async function + that is within the same symbol scope as the call. The operands and result + types of the call must match the specified async function type. The callee + is encoded as a symbol reference attribute named "callee". + + Example: + + ```mlir + %2 = async.call @my_add(%0, %1) : (f32, f32) -> !async.value + ``` + }]; + + let arguments = (ins FlatSymbolRefAttr:$callee, Variadic:$operands); + let results = (outs Variadic); + + let builders = [ + OpBuilder<(ins "FuncOp":$callee, CArg<"ValueRange", "{}">:$operands), [{ + $_state.addOperands(operands); + $_state.addAttribute("callee", SymbolRefAttr::get(callee)); + $_state.addTypes(callee.getFunctionType().getResults()); + }]>, + OpBuilder<(ins "SymbolRefAttr":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + $_state.addOperands(operands); + $_state.addAttribute("callee", callee); + $_state.addTypes(results); + }]>, + OpBuilder<(ins "StringAttr":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + build($_builder, $_state, SymbolRefAttr::get(callee), results, operands); + }]>, + OpBuilder<(ins "StringRef":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + build($_builder, $_state, StringAttr::get($_builder.getContext(), callee), + results, operands); + }]> + ]; + + let extraClassDeclaration = [{ + FunctionType getCalleeType(); + + /// Get the argument operands to the called function. + operand_range getArgOperands() { + return {arg_operand_begin(), arg_operand_end()}; + } + + operand_iterator arg_operand_begin() { return operand_begin(); } + operand_iterator arg_operand_end() { return operand_end(); } + + /// Return the callee of this operation. + CallInterfaceCallable getCallableForCallee() { + return (*this)->getAttrOfType("callee"); + } + }]; + + let assemblyFormat = [{ + $callee `(` $operands `)` attr-dict `:` functional-type($operands, results) + }]; +} + +def Async_ReturnOp : Async_Op<"return", + [Pure, HasParent<"FuncOp">, ReturnLike, Terminator]> { + let summary = "Async function return operation"; + let description = [{ + The `async.return` is a special terminator operation for Async function. + + Example: + + ```mlir + async.func @foo() : !async.token { + return + } + ``` + }]; + + let arguments = (ins Variadic:$operands); + + let builders = [OpBuilder<(ins), [{build($_builder, $_state, llvm::None);}]>]; + + let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; + let hasVerifier = 1; +} + def Async_YieldOp : Async_Op<"yield", [ HasParent<"ExecuteOp">, Pure, Terminator, diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index 4b5d6a1d78fe1..fbbb7357610e5 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -8,7 +8,10 @@ #include "mlir/Dialect/Async/IR/Async.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/FunctionImplementation.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -320,6 +323,134 @@ LogicalResult AwaitOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// FuncOp +//===----------------------------------------------------------------------===// + +void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name, + FunctionType type, ArrayRef attrs, + ArrayRef argAttrs) { + state.addAttribute(SymbolTable::getSymbolAttrName(), + builder.getStringAttr(name)); + state.addAttribute(FunctionOpInterface::getTypeAttrName(), + TypeAttr::get(type)); + + state.attributes.append(attrs.begin(), attrs.end()); + state.addRegion(); + + if (argAttrs.empty()) + return; + assert(type.getNumInputs() == argAttrs.size()); + function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs, + /*resultAttrs=*/llvm::None); +} + +ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) { + auto buildFuncType = + [](Builder &builder, ArrayRef argTypes, ArrayRef results, + function_interface_impl::VariadicFlag, + std::string &) { return builder.getFunctionType(argTypes, results); }; + + return function_interface_impl::parseFunctionOp( + parser, result, /*allowVariadic=*/false, buildFuncType); +} + +void FuncOp::print(OpAsmPrinter &p) { + function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false); +} + +/// Check that the result type of async.func is not void and must be +/// some async token or async values. +LogicalResult FuncOp::verify() { + auto resultTypes = getResultTypes(); + if (resultTypes.empty()) + return emitOpError() + << "result is expected to be at least of size 1, but got " + << resultTypes.size(); + + for (unsigned i = 0, e = resultTypes.size(); i != e; ++i) { + auto type = resultTypes[i]; + if (!type.isa() && !type.isa()) + return emitOpError() << "result type must be async value type or async " + "token type, but got " + << type; + // We only allow AsyncToken appear as the first return value + if (type.isa() && i != 0) { + return emitOpError() + << " results' (optional) async token type is expected " + "to appear as the 1st return value, but got " + << i + 1; + } + } + + return success(); +} + +//===----------------------------------------------------------------------===// +/// CallOp +//===----------------------------------------------------------------------===// + +LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + // Check that the callee attribute was specified. + auto fnAttr = (*this)->getAttrOfType("callee"); + if (!fnAttr) + return emitOpError("requires a 'callee' symbol reference attribute"); + FuncOp fn = symbolTable.lookupNearestSymbolFrom(*this, fnAttr); + if (!fn) + return emitOpError() << "'" << fnAttr.getValue() + << "' does not reference a valid async function"; + + // Verify that the operand and result types match the callee. + auto fnType = fn.getFunctionType(); + if (fnType.getNumInputs() != getNumOperands()) + return emitOpError("incorrect number of operands for callee"); + + for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i) + if (getOperand(i).getType() != fnType.getInput(i)) + return emitOpError("operand type mismatch: expected operand type ") + << fnType.getInput(i) << ", but provided " + << getOperand(i).getType() << " for operand number " << i; + + if (fnType.getNumResults() != getNumResults()) + return emitOpError("incorrect number of results for callee"); + + for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i) + if (getResult(i).getType() != fnType.getResult(i)) { + auto diag = emitOpError("result type mismatch at index ") << i; + diag.attachNote() << " op result types: " << getResultTypes(); + diag.attachNote() << "function result types: " << fnType.getResults(); + return diag; + } + + return success(); +} + +FunctionType CallOp::getCalleeType() { + return FunctionType::get(getContext(), getOperandTypes(), getResultTypes()); +} + +//===----------------------------------------------------------------------===// +/// ReturnOp +//===----------------------------------------------------------------------===// + +LogicalResult ReturnOp::verify() { + auto funcOp = (*this)->getParentOfType(); + ArrayRef resultTypes = funcOp.isStateful() + ? funcOp.getResultTypes().drop_front() + : funcOp.getResultTypes(); + // Get the underlying value types from async types returned from the + // parent `async.func` operation. + auto types = llvm::map_range(resultTypes, [](const Type &result) { + return result.cast().getValueType(); + }); + + if (getOperandTypes() != types) + return emitOpError("operand types do not match the types returned from " + "the parent FuncOp"); + + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Async/ops.mlir b/mlir/test/Dialect/Async/ops.mlir index c391fffd87ebc..36f40d5d074ff 100644 --- a/mlir/test/Dialect/Async/ops.mlir +++ b/mlir/test/Dialect/Async/ops.mlir @@ -136,3 +136,34 @@ func.func @create_group_and_await_all(%arg0: !async.token, %3 = arith.addi %1, %2 : index return %3 : index } + +// CHECK-LABEL: @async_func_return_token +async.func @async_func_return_token() -> !async.token { + // CHECK: return + return +} + +// CHECK-LABEL: @async_func_return_value +async.func @async_func_return_value() -> !async.value { + %0 = arith.constant 42 : i32 + // CHECK: return %[[value:.*]] : i32 + return %0 : i32 +} + +// CHECK-LABEL: @async_func_return_optional_token +async.func @async_func_return_optional_token() -> (!async.token, !async.value) { + %0 = arith.constant 42 : i32 + // CHECK: return %[[value:.*]] : i32 + return %0 : i32 +} + +// CHECK-LABEL: @async_call +func.func @async_call() { + // CHECK: async.call @async_func_return_token + // CHECK: async.call @async_func_return_value + // CHECK: async.call @async_func_return_optional_token + %0 = async.call @async_func_return_token() : () -> !async.token + %1 = async.call @async_func_return_value() : () -> !async.value + %2, %3 = async.call @async_func_return_optional_token() : () -> (!async.token, !async.value) + return +} diff --git a/mlir/test/Dialect/Async/verify.mlir b/mlir/test/Dialect/Async/verify.mlir index 7ec3528abb655..69387a77d9695 100644 --- a/mlir/test/Dialect/Async/verify.mlir +++ b/mlir/test/Dialect/Async/verify.mlir @@ -19,3 +19,29 @@ func.func @wrong_async_await_result_type(%arg0: !async.value) { // expected-error @+1 {{'async.await' op result type 'f64' does not match async value type 'f32'}} %0 = "async.await"(%arg0): (!async.value) -> f64 } + + +// ----- +// expected-error @+1 {{'async.func' op result is expected to be at least of size 1, but got 0}} +async.func @wrong_async_func_void_result_type(%arg0: f32) { + return +} + + +// ----- +// expected-error @+1 {{'async.func' op result type must be async value type or async token type, but got 'f32'}} +async.func @wrong_async_func_result_type(%arg0: f32) -> f32 { + return %arg0 : f32 +} + +// ----- +// expected-error @+1 {{'async.func' op results' (optional) async token type is expected to appear as the 1st return value, but got 2}} +async.func @wrong_async_func_token_type_placement(%arg0: f32) -> (!async.value, !async.token) { + return %arg0 : f32 +} + +// ----- +async.func @wrong_async_func_return_type(%arg0: f32) -> (!async.token, !async.value) { + // expected-error @+1 {{'async.return' op operand types do not match the types returned from the parent FuncOp}} + return %arg0 : f32 +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 010fb851956d3..2208ff0ddf7e6 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1349,7 +1349,9 @@ td_library( ], includes = ["include"], deps = [ + ":CallInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", + ":FunctionInterfacesTdFiles", ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", From 990c189379679f92cd9af4cd384e476a94c0e819 Mon Sep 17 00:00:00 2001 From: Tom Praschan <13141438+tom-anders@users.noreply.github.com> Date: Mon, 31 Oct 2022 21:36:18 +0100 Subject: [PATCH 065/516] [clangd] Add scoped enum constants to all-scopes-completion This was originally part of https://reviews.llvm.org/D136925, but we decided to move it to a separate patch. In case it turns out to be controversial, it can be reverted more easily. Differential Revision: https://reviews.llvm.org/D137104 --- clang-tools-extra/clangd/CodeComplete.cpp | 2 +- clang-tools-extra/clangd/CodeComplete.h | 2 +- .../clangd/unittests/CodeCompleteTests.cpp | 12 +++++++----- .../clangd/unittests/SymbolCollectorTests.cpp | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index a3e518b4ba054..e52cb2643babd 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -2145,7 +2145,7 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { // when // --all-scopes-completion is set, we'll want to complete those as well. if (const auto *EnumDecl = dyn_cast(ND.getDeclContext())) - return (InTopLevelScope(*EnumDecl) || InClassScope(*EnumDecl)) && !EnumDecl->isScoped(); + return (InTopLevelScope(*EnumDecl) || InClassScope(*EnumDecl)); return false; } diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index 269be8944df17..19ef4c17d3b0f 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -291,7 +291,7 @@ SignatureHelp signatureHelp(PathRef FileName, Position Pos, // For index-based completion, we only consider: // * symbols in namespaces or translation unit scopes (e.g. no class // members, no locals) -// * enum constants in unscoped enum decl (e.g. "red" in "enum {red};") +// * enum constants (both scoped and unscoped) // * primary templates (no specializations) // For the other cases, we let Clang do the completion because it does not // need any non-local information and it will be much better at following diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 77451bf445e0f..99d09ad43466a 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -3382,11 +3382,13 @@ TEST(CompletionTest, Enums) { Opts.Index = Index.get(); Opts.AllScopes = true; auto R = completions(Source, {}, Opts); - EXPECT_THAT(R.Completions, - ElementsAre(AllOf(scope("ns::"), named("Clangd1"), - kind(CompletionItemKind::EnumMember)), - AllOf(scope("ns::C::"), named("Clangd2"), - kind(CompletionItemKind::EnumMember)))); + EXPECT_THAT(R.Completions, UnorderedElementsAre( + AllOf(scope("ns::"), named("Clangd1"), + kind(CompletionItemKind::EnumMember)), + AllOf(scope("ns::C::"), named("Clangd2"), + kind(CompletionItemKind::EnumMember)), + AllOf(scope("ns::Scoped::"), named("Clangd3"), + kind(CompletionItemKind::EnumMember)))); } TEST(CompletionTest, ScopeIsUnresolved) { diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index bb651b851afeb..62564b989a186 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1329,7 +1329,7 @@ TEST_F(SymbolCollectorTest, IncludeEnums) { AllOf(qName("Color"), forCodeCompletion(true)), AllOf(qName("Green"), forCodeCompletion(true)), AllOf(qName("Color2"), forCodeCompletion(true)), - AllOf(qName("Color2::Yellow"), forCodeCompletion(false)), + AllOf(qName("Color2::Yellow"), forCodeCompletion(true)), AllOf(qName("ns"), forCodeCompletion(true)), AllOf(qName("ns::Black"), forCodeCompletion(true)), AllOf(qName("Color3"), forCodeCompletion(true)), From a3463a9f5cb0880357fd3e4c47ed4c2b09f0e6fc Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Thu, 27 Oct 2022 19:45:26 +0100 Subject: [PATCH 066/516] [OpenMP][OpenMPIRBuilder] Migrate loadOffloadInfoMetadata from clang to OMPIRbuilder This patch moves the implementation of the loadOffloadInfoMetadata to the OMPIRbuilder. Differential Revision: https://reviews.llvm.org/D136872 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 47 +----------------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 13 +++++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 48 +++++++++++++++++++ 3 files changed, 62 insertions(+), 46 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 122006f667ed2..9a2fc93ce40c6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3171,52 +3171,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; } - llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); - if (!MD) - return; - - for (llvm::MDNode *MN : MD->operands()) { - auto &&GetMDInt = [MN](unsigned Idx) { - auto *V = cast(MN->getOperand(Idx)); - return cast(V->getValue())->getZExtValue(); - }; - - auto &&GetMDString = [MN](unsigned Idx) { - auto *V = cast(MN->getOperand(Idx)); - return V->getString(); - }; - - switch (GetMDInt(0)) { - default: - llvm_unreachable("Unexpected metadata!"); - break; - case llvm::OffloadEntriesInfoManager::OffloadEntryInfo:: - OffloadingEntryInfoTargetRegion: { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the " - "device code generation."); - llvm::TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), - /*DeviceID=*/GetMDInt(1), - /*FileID=*/GetMDInt(2), - /*Line=*/GetMDInt(4)); - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - EntryInfo, /*Order=*/GetMDInt(5)); - break; - } - case llvm::OffloadEntriesInfoManager::OffloadEntryInfo:: - OffloadingEntryInfoDeviceGlobalVar: - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the " - "device code generation."); - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( - /*MangledName=*/GetMDString(1), - static_cast< - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( - /*Flags=*/GetMDInt(2)), - /*Order=*/GetMDInt(3)); - break; - } - } + OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 8ba71d55584a2..3f9fa6d3c8147 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1681,6 +1681,19 @@ class OpenMPIRBuilder { BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name = {}); + /// OMP Offload Info Metadata name string + const std::string ompOffloadInfoName = "omp_offload.info"; + + /// Loads all the offload entries information from the host IR + /// metadata. This function is only meant to be used with device code + /// generation. + /// + /// \param M Module to load Metadata info from. Module passed maybe + /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. + /// \param OffloadEntriesInfoManager Initialize Offload Entry information. + void + loadOffloadInfoMetadata(Module &M, + OffloadEntriesInfoManager &OffloadEntriesInfoManager); }; /// Data structure to contain the information needed to uniquely identify diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 6a3700e3adb70..5052ddc5dde88 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4705,6 +4705,54 @@ void TargetRegionEntryInfo::getTargetRegionEntryFnName( getTargetRegionEntryFnName(Name, ParentName, DeviceID, FileID, Line); } +/// Loads all the offload entries information from the host IR +/// metadata. +void OpenMPIRBuilder::loadOffloadInfoMetadata( + Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) { + // If we are in target mode, load the metadata from the host IR. This code has + // to match the metadata creation in createOffloadEntriesAndInfoMetadata(). + + NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName); + if (!MD) + return; + + for (MDNode *MN : MD->operands()) { + auto &&GetMDInt = [MN](unsigned Idx) { + auto *V = cast(MN->getOperand(Idx)); + return cast(V->getValue())->getZExtValue(); + }; + + auto &&GetMDString = [MN](unsigned Idx) { + auto *V = cast(MN->getOperand(Idx)); + return V->getString(); + }; + + switch (GetMDInt(0)) { + default: + llvm_unreachable("Unexpected metadata!"); + break; + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoTargetRegion: { + TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), + /*DeviceID=*/GetMDInt(1), + /*FileID=*/GetMDInt(2), + /*Line=*/GetMDInt(4)); + OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( + EntryInfo, /*Order=*/GetMDInt(5)); + break; + } + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoDeviceGlobalVar: + OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + /*MangledName=*/GetMDString(1), + static_cast( + /*Flags=*/GetMDInt(2)), + /*Order=*/GetMDInt(3)); + break; + } + } +} + bool OffloadEntriesInfoManager::empty() const { return OffloadEntriesTargetRegion.empty() && OffloadEntriesDeviceGlobalVar.empty(); From 001d18664f8bcf63af64f10688809f7681dfbf0b Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Wed, 2 Nov 2022 12:29:24 -0400 Subject: [PATCH 067/516] [bolt] update bazel rules for f71d32a0eea4 Differential Revision: https://reviews.llvm.org/D137281 --- .../bazel/llvm-project-overlay/bolt/BUILD.bazel | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel index d2ffeedec1967..bc2dd02e653e8 100644 --- a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel @@ -2,6 +2,8 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") + package( default_visibility = ["//visibility:public"], ) @@ -91,14 +93,24 @@ cc_library( ], ) +expand_template( + name = "RuntimeLibraryVariables_inc", + out = "include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc", + substitutions = { + # FIXME this is a total guess + "@LLVM_LIBDIR_SUFFIX@": "lib", + }, + template = "include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc.in", +) + cc_library( name = "RuntimeLibs", srcs = glob([ "lib/RuntimeLibs/*.cpp", ]), - hdrs = glob([ + textual_hdrs = glob([ "include/bolt/RuntimeLibs/*.h", - ]), + ]) + ["include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc"], includes = ["include"], deps = [ ":Core", From 29378ab24b98137b4959034a0882c3bbd97c46e4 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 9 Aug 2022 13:17:30 +0200 Subject: [PATCH 068/516] [libc++] Implement P2438R2 (std::string::substr() &&) This doesn't affect our ABI because `std::string::substr()` isn't in the dylib and the mangling of `substr() const` and `substr() const&` are different. Reviewed By: ldionne, Mordante, var-const, avogelsgesang, #libc Spies: arphaman, huixie90, libcxx-commits Differential Revision: https://reviews.llvm.org/D131668 --- libcxx/docs/ReleaseNotes.rst | 1 + libcxx/docs/Status/Cxx2bPapers.csv | 2 +- libcxx/include/string | 65 ++++- .../debug.iterator.substr.pass.cpp | 49 ++++ .../debug.iterator.index.pass.cpp | 1 + .../string.cons/substr_rvalue.pass.cpp | 233 ++++++++++++++++++ .../string.ops/string_substr/substr.pass.cpp | 189 +++++--------- .../string_substr/substr_rvalue.pass.cpp | 103 ++++++++ libcxx/test/support/count_new.h | 34 +++ libcxx/test/support/make_string.h | 2 +- 10 files changed, 543 insertions(+), 136 deletions(-) create mode 100644 libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp create mode 100644 libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp create mode 100644 libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index d10fdde8d719f..abcebeb01ebee 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -46,6 +46,7 @@ Implemented Papers ``from_chars`` for Integral Types in ```` Header - P0220R1 - Adopt Library Fundamentals V1 TS Components for C++17 - P0482R6 - char8_t: A type for UTF-8 characters and strings +- P2438R2 - ``std::string::substr() &&`` Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index 7017c31a92c41..f40cca9c7ac60 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -71,7 +71,7 @@ "`P2408R5 `__","LWG","Ranges iterators as inputs to non-Ranges algorithms","July 2022","","" "`P2417R2 `__","LWG","A more ``constexpr`` ``bitset``","July 2022","|Complete|","16.0" "`P2419R2 `__","LWG","Clarify handling of encodings in localized formatting of chrono types","July 2022","","" -"`P2438R2 `__","LWG","``std::string::substr() &&``","July 2022","","" +"`P2438R2 `__","LWG","``std::string::substr() &&``","July 2022","|Complete|","16.0" "`P2445R1 `__","LWG","``forward_like``","July 2022","|Complete|","16.0" "`P2446R2 `__","LWG","``views::as_rvalue``","July 2022","","" "`P2460R2 `__","LWG","Relax requirements on ``wchar_t`` to match existing practices","July 2022","","" diff --git a/libcxx/include/string b/libcxx/include/string index 8eb1d30970421..726bba3156f6e 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -109,6 +109,10 @@ public: const allocator_type& a = allocator_type()); // constexpr since C++20 basic_string(const basic_string& str, size_type pos, size_type n, const Allocator& a = Allocator()); // constexpr since C++20 + constexpr basic_string( + basic_string&& str, size_type pos, const Allocator& a = Allocator()); // since C++23 + constexpr basic_string( + basic_string&& str, size_type pos, size_type n, const Allocator& a = Allocator()); // since C++23 template basic_string(const T& t, size_type pos, size_type n, const Allocator& a = Allocator()); // C++17, constexpr since C++20 template @@ -261,8 +265,9 @@ public: basic_string& replace(const_iterator i1, const_iterator i2, initializer_list); // constexpr since C++20 size_type copy(value_type* s, size_type n, size_type pos = 0) const; // constexpr since C++20 - basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20 - + basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr in C++20, removed in C++23 + basic_string substr(size_type pos = 0, size_type n = npos) const&; // since C++23 + constexpr basic_string substr(size_type pos = 0, size_type n = npos) &&; // since C++23 void swap(basic_string& str) noexcept(allocator_traits::propagate_on_container_swap::value || allocator_traits::is_always_equal::value); // C++17, constexpr since C++20 @@ -897,6 +902,36 @@ public: std::__debug_db_insert_c(this); } +#if _LIBCPP_STD_VER > 20 + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string(basic_string&& __str, size_type __pos, const _Allocator& __alloc = _Allocator()) + : basic_string(std::move(__str), __pos, npos, __alloc) {} + + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string(basic_string&& __str, size_type __pos, size_type __n, const _Allocator& __alloc = _Allocator()) + : __r_(__default_init_tag(), __alloc) { + if (__pos > __str.size()) + __throw_out_of_range(); + + auto __len = std::min(__n, __str.size() - __pos); + if (__alloc_traits::is_always_equal::value || __alloc == __str.__alloc()) { + __r_.first() = __str.__r_.first(); + __str.__default_init(); + + _Traits::move(data(), data() + __pos, __len); + __set_size(__len); + _Traits::assign(data()[__len], value_type()); + } else { + // Perform a copy because the allocators are not compatible. + __init(__str.data() + __pos, __len); + } + + std::__debug_db_insert_c(this); + if (__is_long()) + std::__debug_db_swap(this, &__str); + } +#endif + template ::value, nullptr_t> > _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c, const _Allocator& __a); @@ -1324,8 +1359,24 @@ public: #endif // _LIBCPP_CXX03_LANG _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type copy(value_type* __s, size_type __n, size_type __pos = 0) const; + + // TODO: Maybe don't pass in the allocator. See https://llvm.org/PR57190 +#if _LIBCPP_STD_VER <= 20 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string substr(size_type __pos = 0, size_type __n = npos) const; + basic_string substr(size_type __pos = 0, size_type __n = npos) const { + return basic_string(*this, __pos, __n, __alloc()); + } +#else + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string substr(size_type __pos = 0, size_type __n = npos) const& { + return basic_string(*this, __pos, __n, __alloc()); + } + + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string substr(size_type __pos = 0, size_type __n = npos) && { + return basic_string(std::move(*this), __pos, __n, __alloc()); + } +#endif _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(basic_string& __str) @@ -3472,14 +3523,6 @@ basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n, return __rlen; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator> -basic_string<_CharT, _Traits, _Allocator>::substr(size_type __pos, size_type __n) const -{ - return basic_string(*this, __pos, __n, __alloc()); -} - template inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void diff --git a/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp new file mode 100644 index 0000000000000..8eeb26233acc7 --- /dev/null +++ b/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Check that basic_string(basic_string&&, size_type, Allocator) and +// basic_string(basic_string&&, size_type, size_type, Allocator) inserts the container into the debug database + +// REQUIRES: has-unix-headers +// UNSUPPORTED: !libcpp-has-debug-mode, c++03 + +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + using namespace std::string_literals; + + { + std::string s = {"Banane"s, 1}; + auto i = s.begin(); + assert(i[0] == 'a'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"Banane"s, 0, 5}; + auto i = s.begin(); + assert(i[0] == 'B'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"long long string so no SSO"s, 21}; + auto i = s.begin(); + assert(i[0] == 'o'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"long long string so no SSO"s, 0, 5}; + auto i = s.begin(); + assert(i[0] == 'l'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } +} diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp index f1b4e5666569b..13a2301937309 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp @@ -20,6 +20,7 @@ #include "min_allocator.h" int main(int, char**) { + using T = decltype(uint8_t() - uint8_t()); { typedef std::string C; C c(1, '\0'); diff --git a/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp new file mode 100644 index 0000000000000..6a431be5cf851 --- /dev/null +++ b/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp @@ -0,0 +1,233 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// constexpr basic_string(basic_string&& str, size_type pos, const Allocator& a = Allocator()); +// constexpr basic_string(basic_string&& str, size_type pos, size_type n, const Allocator& a = Allocator()); + +#include +#include + +#include "constexpr_char_traits.h" +#include "count_new.h" +#include "make_string.h" +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_macros.h" + +#define STR(string) MAKE_CSTRING(typename S::value_type, string) + +constexpr struct should_throw_exception_t { +} should_throw_exception; + +template +constexpr void test_string_pos(S orig, typename S::size_type pos, S expected) { +#ifdef _LIBCPP_VERSION + ConstexprDisableAllocationGuard g; +#endif + S substr(std::move(orig), pos); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(orig.empty()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); +} + +template +constexpr void test_string_pos(S orig, typename S::size_type pos, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; +#endif +} + +template +constexpr void +test_string_pos_alloc(S orig, typename S::size_type pos, const typename S::allocator_type& alloc, S expected) { + S substr(std::move(orig), pos, alloc); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); + assert(substr.get_allocator() == alloc); +} + +template +constexpr void test_string_pos_alloc( + S orig, typename S::size_type pos, const typename S::allocator_type& alloc, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, alloc); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)alloc; +#endif +} + +template +constexpr void test_string_pos_n(S orig, typename S::size_type pos, typename S::size_type n, S expected) { +#ifdef _LIBCPP_VERSION + ConstexprDisableAllocationGuard g; +#endif + S substr(std::move(orig), pos, n); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(orig.empty()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); +} + +template +constexpr void test_string_pos_n(S orig, typename S::size_type pos, typename S::size_type n, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, n); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; +#endif +} + +template +constexpr void test_string_pos_n_alloc( + S orig, typename S::size_type pos, typename S::size_type n, const typename S::allocator_type& alloc, S expected) { + S substr(std::move(orig), pos, n, alloc); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); + assert(substr.get_allocator() == alloc); +} + +template +constexpr void test_string_pos_n_alloc( + S orig, + typename S::size_type pos, + typename S::size_type n, + const typename S::allocator_type& alloc, + should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, n, alloc); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; + (void)alloc; +#endif +} + +template +constexpr void test_string(const typename S::allocator_type& alloc) { + test_string_pos(STR(""), 0, STR("")); + test_string_pos(STR(""), 1, should_throw_exception); + test_string_pos(STR("Banane"), 1, STR("anane")); + test_string_pos(STR("Banane"), 6, STR("")); + test_string_pos(STR("Banane"), 7, should_throw_exception); + test_string_pos(STR("long long string so no SSO"), 0, STR("long long string so no SSO")); + test_string_pos(STR("long long string so no SSO"), 10, STR("string so no SSO")); + test_string_pos(STR("long long string so no SSO"), 26, STR("")); + test_string_pos(STR("long long string so no SSO"), 27, should_throw_exception); + + test_string_pos_alloc(STR(""), 0, alloc, STR("")); + test_string_pos_alloc(STR(""), 1, alloc, should_throw_exception); + test_string_pos_alloc(STR("Banane"), 1, alloc, STR("anane")); + test_string_pos_alloc(STR("Banane"), 6, alloc, STR("")); + test_string_pos_alloc(STR("Banane"), 7, alloc, should_throw_exception); + test_string_pos_alloc(STR("long long string so no SSO"), 0, alloc, STR("long long string so no SSO")); + test_string_pos_alloc(STR("long long string so no SSO"), 10, alloc, STR("string so no SSO")); + test_string_pos_alloc(STR("long long string so no SSO"), 26, alloc, STR("")); + test_string_pos_alloc(STR("long long string so no SSO"), 27, alloc, should_throw_exception); + + test_string_pos_n(STR(""), 0, 0, STR("")); + test_string_pos_n(STR(""), 0, 1, STR("")); + test_string_pos_n(STR(""), 1, 0, should_throw_exception); + test_string_pos_n(STR(""), 1, 1, should_throw_exception); + test_string_pos_n(STR("Banane"), 1, 10, STR("anane")); + test_string_pos_n(STR("Banane"), 6, 0, STR("")); + test_string_pos_n(STR("Banane"), 6, 5, STR("")); + test_string_pos_n(STR("Banane"), 7, 10, should_throw_exception); + test_string_pos_n(STR("long long string so no SSO"), 0, 10, STR("long long ")); + test_string_pos_n(STR("long long string so no SSO"), 10, 8, STR("string s")); + test_string_pos_n(STR("long long string so no SSO"), 20, 10, STR("no SSO")); + test_string_pos_n(STR("long long string so no SSO"), 26, 10, STR("")); + test_string_pos_n(STR("long long string so no SSO"), 27, 10, should_throw_exception); + + test_string_pos_n_alloc(STR(""), 0, 0, alloc, STR("")); + test_string_pos_n_alloc(STR(""), 0, 1, alloc, STR("")); + test_string_pos_n_alloc(STR(""), 1, 0, alloc, should_throw_exception); + test_string_pos_n_alloc(STR(""), 1, 1, alloc, should_throw_exception); + test_string_pos_n_alloc(STR("Banane"), 1, 10, alloc, STR("anane")); + test_string_pos_n_alloc(STR("Banane"), 6, 0, alloc, STR("")); + test_string_pos_n_alloc(STR("Banane"), 6, 5, alloc, STR("")); + test_string_pos_n_alloc(STR("Banane"), 7, 10, alloc, should_throw_exception); + test_string_pos_n_alloc(STR("long long string so no SSO"), 0, 10, alloc, STR("long long ")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 10, 8, alloc, STR("string s")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 20, 10, alloc, STR("no SSO")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 26, 10, alloc, STR("")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 27, 10, alloc, should_throw_exception); +} + +template +constexpr void test_allocators() { + test_string>>(std::allocator{}); + test_string>>(min_allocator{}); + test_string>>(test_allocator{42}); +} + +template +constexpr bool test_char_traits() { + test_allocators>(); + test_allocators>(); + + return true; +} + +int main(int, char**) { + // TODO: put these into a single function when we increase the constexpr step limit + test_char_traits(); + static_assert(test_char_traits()); + test_char_traits(); + static_assert(test_char_traits()); + test_char_traits(); + static_assert(test_char_traits()); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_char_traits(); + static_assert(test_char_traits()); +#endif +#ifndef TEST_HAS_NO_CHAR8_T + test_char_traits(); + static_assert(test_char_traits()); +#endif + + return 0; +} diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp index 4ae469d597a0f..7f6404abd8261 100644 --- a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp @@ -8,7 +8,8 @@ // -// basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20 +// basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20, removed in C++23 +// basic_string substr(size_type pos = 0, size_type n = npos) const&; // since in C++23 #include #include @@ -47,130 +48,72 @@ test(const S& s, typename S::size_type pos, typename S::size_type n) #endif } +template +TEST_CONSTEXPR_CXX20 void test_string() { + test(S(""), 0, 0); + test(S(""), 1, 0); + test(S("pniot"), 0, 0); + test(S("htaob"), 0, 1); + test(S("fodgq"), 0, 2); + test(S("hpqia"), 0, 4); + test(S("qanej"), 0, 5); + test(S("dfkap"), 1, 0); + test(S("clbao"), 1, 1); + test(S("ihqrf"), 1, 2); + test(S("mekdn"), 1, 3); + test(S("ngtjf"), 1, 4); + test(S("srdfq"), 2, 0); + test(S("qkdrs"), 2, 1); + test(S("ikcrq"), 2, 2); + test(S("cdaih"), 2, 3); + test(S("dmajb"), 4, 0); + test(S("karth"), 4, 1); + test(S("lhcdo"), 5, 0); + test(S("acbsj"), 6, 0); + test(S("pbsjikaole"), 0, 0); + test(S("pcbahntsje"), 0, 1); + test(S("mprdjbeiak"), 0, 5); + test(S("fhepcrntko"), 0, 9); + test(S("eqmpaidtls"), 0, 10); + test(S("joidhalcmq"), 1, 0); + test(S("omigsphflj"), 1, 1); + test(S("kocgbphfji"), 1, 4); + test(S("onmjekafbi"), 1, 8); + test(S("fbslrjiqkm"), 1, 9); + test(S("oqmrjahnkg"), 5, 0); + test(S("jeidpcmalh"), 5, 1); + test(S("schfalibje"), 5, 2); + test(S("crliponbqe"), 5, 4); + test(S("igdscopqtm"), 5, 5); + test(S("qngpdkimlc"), 9, 0); + test(S("thdjgafrlb"), 9, 1); + test(S("hcjitbfapl"), 10, 0); + test(S("mgojkldsqh"), 11, 0); + test(S("gfshlcmdjreqipbontak"), 0, 0); + test(S("nadkhpfemgclosibtjrq"), 0, 1); + test(S("nkodajteqplrbifhmcgs"), 0, 10); + test(S("ofdrqmkeblthacpgijsn"), 0, 19); + test(S("gbmetiprqdoasckjfhln"), 0, 20); + test(S("bdfjqgatlksriohemnpc"), 1, 0); + test(S("crnklpmegdqfiashtojb"), 1, 1); + test(S("ejqcnahdrkfsmptilgbo"), 1, 9); + test(S("jsbtafedocnirgpmkhql"), 1, 18); + test(S("prqgnlbaejsmkhdctoif"), 1, 19); + test(S("qnmodrtkebhpasifgcjl"), 10, 0); + test(S("pejafmnokrqhtisbcdgl"), 10, 1); + test(S("cpebqsfmnjdolhkratgi"), 10, 5); + test(S("odnqkgijrhabfmcestlp"), 10, 9); + test(S("lmofqdhpkibagnrcjste"), 10, 10); + test(S("lgjqketopbfahrmnsicd"), 19, 0); + test(S("ktsrmnqagdecfhijpobl"), 19, 1); + test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); + test(S("dplqartnfgejichmoskb"), 21, 0); +} + TEST_CONSTEXPR_CXX20 bool test() { - { - typedef std::string S; - test(S(""), 0, 0); - test(S(""), 1, 0); - test(S("pniot"), 0, 0); - test(S("htaob"), 0, 1); - test(S("fodgq"), 0, 2); - test(S("hpqia"), 0, 4); - test(S("qanej"), 0, 5); - test(S("dfkap"), 1, 0); - test(S("clbao"), 1, 1); - test(S("ihqrf"), 1, 2); - test(S("mekdn"), 1, 3); - test(S("ngtjf"), 1, 4); - test(S("srdfq"), 2, 0); - test(S("qkdrs"), 2, 1); - test(S("ikcrq"), 2, 2); - test(S("cdaih"), 2, 3); - test(S("dmajb"), 4, 0); - test(S("karth"), 4, 1); - test(S("lhcdo"), 5, 0); - test(S("acbsj"), 6, 0); - test(S("pbsjikaole"), 0, 0); - test(S("pcbahntsje"), 0, 1); - test(S("mprdjbeiak"), 0, 5); - test(S("fhepcrntko"), 0, 9); - test(S("eqmpaidtls"), 0, 10); - test(S("joidhalcmq"), 1, 0); - test(S("omigsphflj"), 1, 1); - test(S("kocgbphfji"), 1, 4); - test(S("onmjekafbi"), 1, 8); - test(S("fbslrjiqkm"), 1, 9); - test(S("oqmrjahnkg"), 5, 0); - test(S("jeidpcmalh"), 5, 1); - test(S("schfalibje"), 5, 2); - test(S("crliponbqe"), 5, 4); - test(S("igdscopqtm"), 5, 5); - test(S("qngpdkimlc"), 9, 0); - test(S("thdjgafrlb"), 9, 1); - test(S("hcjitbfapl"), 10, 0); - test(S("mgojkldsqh"), 11, 0); - test(S("gfshlcmdjreqipbontak"), 0, 0); - test(S("nadkhpfemgclosibtjrq"), 0, 1); - test(S("nkodajteqplrbifhmcgs"), 0, 10); - test(S("ofdrqmkeblthacpgijsn"), 0, 19); - test(S("gbmetiprqdoasckjfhln"), 0, 20); - test(S("bdfjqgatlksriohemnpc"), 1, 0); - test(S("crnklpmegdqfiashtojb"), 1, 1); - test(S("ejqcnahdrkfsmptilgbo"), 1, 9); - test(S("jsbtafedocnirgpmkhql"), 1, 18); - test(S("prqgnlbaejsmkhdctoif"), 1, 19); - test(S("qnmodrtkebhpasifgcjl"), 10, 0); - test(S("pejafmnokrqhtisbcdgl"), 10, 1); - test(S("cpebqsfmnjdolhkratgi"), 10, 5); - test(S("odnqkgijrhabfmcestlp"), 10, 9); - test(S("lmofqdhpkibagnrcjste"), 10, 10); - test(S("lgjqketopbfahrmnsicd"), 19, 0); - test(S("ktsrmnqagdecfhijpobl"), 19, 1); - test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); - test(S("dplqartnfgejichmoskb"), 21, 0); - } + test_string(); #if TEST_STD_VER >= 11 - { - typedef std::basic_string, min_allocator> S; - test(S(""), 0, 0); - test(S(""), 1, 0); - test(S("pniot"), 0, 0); - test(S("htaob"), 0, 1); - test(S("fodgq"), 0, 2); - test(S("hpqia"), 0, 4); - test(S("qanej"), 0, 5); - test(S("dfkap"), 1, 0); - test(S("clbao"), 1, 1); - test(S("ihqrf"), 1, 2); - test(S("mekdn"), 1, 3); - test(S("ngtjf"), 1, 4); - test(S("srdfq"), 2, 0); - test(S("qkdrs"), 2, 1); - test(S("ikcrq"), 2, 2); - test(S("cdaih"), 2, 3); - test(S("dmajb"), 4, 0); - test(S("karth"), 4, 1); - test(S("lhcdo"), 5, 0); - test(S("acbsj"), 6, 0); - test(S("pbsjikaole"), 0, 0); - test(S("pcbahntsje"), 0, 1); - test(S("mprdjbeiak"), 0, 5); - test(S("fhepcrntko"), 0, 9); - test(S("eqmpaidtls"), 0, 10); - test(S("joidhalcmq"), 1, 0); - test(S("omigsphflj"), 1, 1); - test(S("kocgbphfji"), 1, 4); - test(S("onmjekafbi"), 1, 8); - test(S("fbslrjiqkm"), 1, 9); - test(S("oqmrjahnkg"), 5, 0); - test(S("jeidpcmalh"), 5, 1); - test(S("schfalibje"), 5, 2); - test(S("crliponbqe"), 5, 4); - test(S("igdscopqtm"), 5, 5); - test(S("qngpdkimlc"), 9, 0); - test(S("thdjgafrlb"), 9, 1); - test(S("hcjitbfapl"), 10, 0); - test(S("mgojkldsqh"), 11, 0); - test(S("gfshlcmdjreqipbontak"), 0, 0); - test(S("nadkhpfemgclosibtjrq"), 0, 1); - test(S("nkodajteqplrbifhmcgs"), 0, 10); - test(S("ofdrqmkeblthacpgijsn"), 0, 19); - test(S("gbmetiprqdoasckjfhln"), 0, 20); - test(S("bdfjqgatlksriohemnpc"), 1, 0); - test(S("crnklpmegdqfiashtojb"), 1, 1); - test(S("ejqcnahdrkfsmptilgbo"), 1, 9); - test(S("jsbtafedocnirgpmkhql"), 1, 18); - test(S("prqgnlbaejsmkhdctoif"), 1, 19); - test(S("qnmodrtkebhpasifgcjl"), 10, 0); - test(S("pejafmnokrqhtisbcdgl"), 10, 1); - test(S("cpebqsfmnjdolhkratgi"), 10, 5); - test(S("odnqkgijrhabfmcestlp"), 10, 9); - test(S("lmofqdhpkibagnrcjste"), 10, 10); - test(S("lgjqketopbfahrmnsicd"), 19, 0); - test(S("ktsrmnqagdecfhijpobl"), 19, 1); - test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); - test(S("dplqartnfgejichmoskb"), 21, 0); - } + test_string, min_allocator>>(); #endif return true; diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp new file mode 100644 index 0000000000000..13019ae351077 --- /dev/null +++ b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// constexpr basic_string substr(size_type pos = 0, size_type n = npos) &&; + +#include +#include + +#include "constexpr_char_traits.h" +#include "make_string.h" +#include "min_allocator.h" +#include "test_allocator.h" + +#define STR(string) MAKE_CSTRING(typename S::value_type, string) + +constexpr struct should_throw_exception_t { +} should_throw_exception; + +template +constexpr void test(S orig, size_t pos, ptrdiff_t n, S expected) { + S str = std::move(orig).substr(pos, n); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(str.__invariants()); + assert(str == expected); +} + +template +constexpr void test(S orig, size_t pos, ptrdiff_t n, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + S str = std::move(orig).substr(pos, n); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; +#endif +} + +template +constexpr void test_string() { + test(STR(""), 0, 0, STR("")); + test(STR(""), 0, 1, STR("")); + test(STR(""), 1, 0, should_throw_exception); + test(STR(""), 1, 1, should_throw_exception); + test(STR("short string"), 0, 1, STR("s")); + test(STR("short string"), 5, 5, STR(" stri")); + test(STR("short string"), 12, 5, STR("")); + test(STR("short string"), 13, 5, should_throw_exception); + test(STR("long long string so no SSO"), 0, 0, STR("")); + test(STR("long long string so no SSO"), 0, 10, STR("long long ")); + test(STR("long long string so no SSO"), 10, 10, STR("string so ")); + test(STR("long long string so no SSO"), 20, 10, STR("no SSO")); + test(STR("long long string so no SSO"), 26, 10, STR("")); + test(STR("long long string so no SSO"), 27, 0, should_throw_exception); +} + +template +constexpr void test_allocators() { + test_string>>(); + test_string>>(); + test_string>>(); +} + +template +constexpr void test_char_traits() { + test_allocators>(); + test_allocators>(); +} + +constexpr bool test() { + test_char_traits(); + test_char_traits(); + test_char_traits(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_char_traits(); +#endif +#ifndef TEST_HAS_NO_CHAR8_T + test_char_traits(); +#endif + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/support/count_new.h b/libcxx/test/support/count_new.h index aadebe444708f..645062a01446d 100644 --- a/libcxx/test/support/count_new.h +++ b/libcxx/test/support/count_new.h @@ -472,6 +472,40 @@ struct DisableAllocationGuard { DisableAllocationGuard& operator=(DisableAllocationGuard const&); }; +#if TEST_STD_VER >= 20 + +struct ConstexprDisableAllocationGuard { + TEST_CONSTEXPR_CXX14 explicit ConstexprDisableAllocationGuard(bool disable = true) : m_disabled(disable) + { + if (!TEST_IS_CONSTANT_EVALUATED) { + // Don't re-disable if already disabled. + if (globalMemCounter.disable_allocations == true) m_disabled = false; + if (m_disabled) globalMemCounter.disableAllocations(); + } else { + m_disabled = false; + } + } + + TEST_CONSTEXPR_CXX14 void release() { + if (!TEST_IS_CONSTANT_EVALUATED) { + if (m_disabled) globalMemCounter.enableAllocations(); + m_disabled = false; + } + } + + TEST_CONSTEXPR_CXX20 ~ConstexprDisableAllocationGuard() { + release(); + } + +private: + bool m_disabled; + + ConstexprDisableAllocationGuard(ConstexprDisableAllocationGuard const&); + ConstexprDisableAllocationGuard& operator=(ConstexprDisableAllocationGuard const&); +}; + +#endif + struct RequireAllocationGuard { explicit RequireAllocationGuard(std::size_t RequireAtLeast = 1) : m_req_alloc(RequireAtLeast), diff --git a/libcxx/test/support/make_string.h b/libcxx/test/support/make_string.h index 00c2a48e3d004..728b6540abe07 100644 --- a/libcxx/test/support/make_string.h +++ b/libcxx/test/support/make_string.h @@ -89,7 +89,7 @@ struct MultiStringType { // This helper is used in unit tests to make them generic. The input should be // valid ASCII which means the input is also valid UTF-8. #define MAKE_CSTRING(CharT, Str) \ - MKSTR(Str).as_ptr((const CharT*)0) + MKSTR(Str).as_ptr(static_cast(nullptr)) // Like MAKE_CSTRING but makes a basic_string. Embedded nulls are OK. #define MAKE_STRING(CharT, Str) \ From 35c9085121816abc2108cb6f1754b39bfdc3085f Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Wed, 2 Nov 2022 11:42:46 -0700 Subject: [PATCH 069/516] [mlir][llvmir] Support FastmathFlags for LLVM intrinsic operations. This is required for D126305 code to propagate fastmath attributes for Arith operations that are converted to LLVM IR intrinsics operations. LLVM IR intrinsic operations are using custom assembly format now to avoid printing {fastmathFlags = #llvm.fastmath}, which is too verbose. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D136225 --- flang/test/Intrinsics/math-codegen.fir | 108 +++++++++--------- flang/test/Lower/Intrinsics/anint.f90 | 6 +- flang/test/Lower/math-lowering.f90 | 12 +- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 51 +++++++-- mlir/lib/Dialect/LLVMIR/CMakeLists.txt | 1 - mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 3 + .../Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp | 7 -- .../Conversion/ArithToLLVM/arith-to-llvm.mlir | 12 +- .../ComplexToLLVM/convert-to-llvm.mlir | 2 +- .../ComplexToLLVM/full-conversion.mlir | 2 +- .../ComplexToStandard/full-conversion.mlir | 2 +- .../Conversion/FuncToLLVM/func-to-llvm.mlir | 8 +- .../Conversion/MathToLLVM/math-to-llvm.mlir | 36 +++--- .../SPIRVToLLVM/bitwise-ops-to-llvm.mlir | 8 +- .../SPIRVToLLVM/gl-ops-to-llvm.mlir | 56 ++++----- .../VectorToLLVM/vector-to-llvm.mlir | 14 +-- .../Dialect/LLVMIR/optimize-for-nvvm.mlir | 4 +- mlir/test/Dialect/LLVMIR/roundtrip.mlir | 29 +++-- mlir/test/Dialect/OpenMP/ops.mlir | 4 +- mlir/test/Target/LLVMIR/Import/intrinsic.ll | 96 ++++++++-------- mlir/test/Target/LLVMIR/llvmir.mlir | 11 ++ 21 files changed, 255 insertions(+), 217 deletions(-) delete mode 100644 mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp diff --git a/flang/test/Intrinsics/math-codegen.fir b/flang/test/Intrinsics/math-codegen.fir index 2c658d4c1b4d6..0af896adf3226 100644 --- a/flang/test/Intrinsics/math-codegen.fir +++ b/flang/test/Intrinsics/math-codegen.fir @@ -4,13 +4,13 @@ //--- abs_fast.fir // RUN: fir-opt %t/abs_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/abs_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f128) -> f128 // CHECK: @_QPtest_complex4 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 @@ -76,13 +76,13 @@ func.func private @hypot(f64, f64) -> f64 //--- abs_relaxed.fir // RUN: fir-opt %t/abs_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/abs_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f128) -> f128 // CHECK: @_QPtest_complex4 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 @@ -304,15 +304,15 @@ func.func private @llvm.trunc.f64(f64) -> f64 //--- anint_fast.fir // RUN: fir-opt %t/anint_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/anint_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f32) -> f32 + %2 = llvm.intr.round(%1) : (f32) -> f32 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f32 @@ -320,7 +320,7 @@ func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f64) -> f64 + %2 = llvm.intr.round(%1) : (f64) -> f64 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f64 @@ -329,15 +329,15 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- anint_relaxed.fir // RUN: fir-opt %t/anint_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/anint_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f32) -> f32 + %2 = llvm.intr.round(%1) : (f32) -> f32 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f32 @@ -345,7 +345,7 @@ func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f64) -> f64 + %2 = llvm.intr.round(%1) : (f64) -> f64 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f64 @@ -541,10 +541,10 @@ func.func private @atan2(f64, f64) -> f64 //--- ceiling_fast.fir // RUN: fir-opt %t/ceiling_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/ceiling_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -570,10 +570,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- ceiling_relaxed.fir // RUN: fir-opt %t/ceiling_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/ceiling_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -630,10 +630,10 @@ func.func private @ceil(f64) -> f64 //--- cos_fast.fir // RUN: fir-opt %t/cos_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/cos_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -655,10 +655,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- cos_relaxed.fir // RUN: fir-opt %t/cos_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/cos_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -865,10 +865,10 @@ func.func private @erf(f64) -> f64 //--- exp_fast.fir // RUN: fir-opt %t/exp_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/exp_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -890,10 +890,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- exp_relaxed.fir // RUN: fir-opt %t/exp_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/exp_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -942,10 +942,10 @@ func.func private @exp(f64) -> f64 //--- floor_fast.fir // RUN: fir-opt %t/floor_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/floor_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -971,10 +971,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- floor_relaxed.fir // RUN: fir-opt %t/floor_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/floor_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1031,10 +1031,10 @@ func.func private @floor(f64) -> f64 //--- log_fast.fir // RUN: fir-opt %t/log_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1056,10 +1056,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- log_relaxed.fir // RUN: fir-opt %t/log_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1108,10 +1108,10 @@ func.func private @log(f64) -> f64 //--- log10_fast.fir // RUN: fir-opt %t/log10_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log10_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1133,10 +1133,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- log10_relaxed.fir // RUN: fir-opt %t/log10_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log10_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1310,13 +1310,13 @@ func.func private @llvm.lround.i64.f64(f64) -> i64 // CHECK: @_QPtest_real4 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 // CHECK: @_QPtest_real8 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}, %arg2: !fir.ref {fir.bindc_name = "s"}, %arg3: !fir.ref {fir.bindc_name = "i"}) -> f32 { @@ -1363,13 +1363,13 @@ func.func private @llvm.powi.f64.i32(f64, i32) -> f64 // CHECK: @_QPtest_real4 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 // CHECK: @_QPtest_real8 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}, %arg2: !fir.ref {fir.bindc_name = "s"}, %arg3: !fir.ref {fir.bindc_name = "i"}) -> f32 { @@ -1482,16 +1482,16 @@ func.func @_QPtest_int4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir //--- sign_fast.fir // RUN: fir-opt %t/sign_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sign_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: @_QPtest_real10 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1533,16 +1533,16 @@ func.func @_QPtest_real16(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: ! //--- sign_relaxed.fir // RUN: fir-opt %t/sign_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sign_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: @_QPtest_real10 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1639,10 +1639,10 @@ func.func private @llvm.copysign.f128(f128, f128) -> f128 //--- sin_fast.fir // RUN: fir-opt %t/sin_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sin_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1664,10 +1664,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- sin_relaxed.fir // RUN: fir-opt %t/sin_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sin_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} diff --git a/flang/test/Lower/Intrinsics/anint.f90 b/flang/test/Lower/Intrinsics/anint.f90 index 03b376e2605c7..fe479a07681ee 100644 --- a/flang/test/Lower/Intrinsics/anint.f90 +++ b/flang/test/Lower/Intrinsics/anint.f90 @@ -4,7 +4,7 @@ ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f32) -> f32 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f32) -> f32 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } @@ -18,7 +18,7 @@ subroutine anint_test(a, b) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f64) -> f64 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f64) -> f64 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } @@ -32,7 +32,7 @@ subroutine anint_test_real8(a, b) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f80) -> f80 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f80) -> f80 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } diff --git a/flang/test/Lower/math-lowering.f90 b/flang/test/Lower/math-lowering.f90 index 7d9bf7c0c2a86..82dfaf4fc7301 100644 --- a/flang/test/Lower/math-lowering.f90 +++ b/flang/test/Lower/math-lowering.f90 @@ -109,8 +109,8 @@ function test_real4(x) end function ! ALL-LABEL: @_QPtest_real4 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 function test_real8(x) @@ -119,8 +119,8 @@ function test_real8(x) end function ! ALL-LABEL: @_QPtest_real8 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 function test_real10(x) @@ -129,8 +129,8 @@ function test_real10(x) end function ! ALL-LABEL: @_QPtest_real10 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f80) -> f80 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f80) -> f80 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f80) -> f80 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f80) -> f80 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f80({{%[A-Za-z0-9._]+}}) : (f80) -> f80 ! TODO: wait until fp128 is supported well in llvm.round diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index ca902f9d848cf..b2257a163932c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -2,6 +2,7 @@ #define LLVM_INTRINSIC_OPS include "mlir/IR/OpBase.td" +include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -12,38 +13,59 @@ include "mlir/Interfaces/InferTypeOpInterface.td" // "intr." to avoid potential name clashes. class LLVM_UnaryIntrOpBase traits = []> : + list traits = [], + dag addAttrs = (ins)> : LLVM_OneResultIntrOp { - let arguments = (ins LLVM_ScalarOrVectorOf:$in); + dag args = (ins LLVM_ScalarOrVectorOf:$in); + let arguments = !con(args, addAttrs); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_UnaryIntrOpI traits = []> : LLVM_UnaryIntrOpBase; class LLVM_UnaryIntrOpF traits = []> : - LLVM_UnaryIntrOpBase; + LLVM_UnaryIntrOpBase], + traits), + (ins DefaultValuedAttr:$fastmathFlags)>; class LLVM_BinarySameArgsIntrOpBase traits = []> : + list traits = [], + dag addAttrs = (ins)> : LLVM_OneResultIntrOp { - let arguments = (ins LLVM_ScalarOrVectorOf:$a, - LLVM_ScalarOrVectorOf:$b); + dag args = (ins LLVM_ScalarOrVectorOf:$a, + LLVM_ScalarOrVectorOf:$b); + let arguments = !con(args, addAttrs); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_BinarySameArgsIntrOpI traits = []> : LLVM_BinarySameArgsIntrOpBase; class LLVM_BinarySameArgsIntrOpF traits = []> : - LLVM_BinarySameArgsIntrOpBase; + LLVM_BinarySameArgsIntrOpBase], + traits), + (ins DefaultValuedAttr:$fastmathFlags)>; class LLVM_TernarySameArgsIntrOpF traits = []> : LLVM_OneResultIntrOp { + !listconcat([DeclareOpInterfaceMethods, + Pure, SameOperandsAndResultType], traits)> { let arguments = (ins LLVM_ScalarOrVectorOf:$a, LLVM_ScalarOrVectorOf:$b, - LLVM_ScalarOrVectorOf:$c); + LLVM_ScalarOrVectorOf:$c, + DefaultValuedAttr:$fastmathFlags); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_CountZerosIntrOp traits = []> : @@ -83,9 +105,14 @@ def LLVM_RoundOp : LLVM_UnaryIntrOpF<"round">; def LLVM_FTruncOp : LLVM_UnaryIntrOpF<"trunc">; def LLVM_SqrtOp : LLVM_UnaryIntrOpF<"sqrt">; def LLVM_PowOp : LLVM_BinarySameArgsIntrOpF<"pow">; -def LLVM_PowIOp : LLVM_OneResultIntrOp<"powi"> { - let arguments = (ins LLVM_ScalarOrVectorOf:$val, - AnySignlessInteger:$power); +def LLVM_PowIOp : LLVM_OneResultIntrOp<"powi", [], [0,1], + [DeclareOpInterfaceMethods, Pure]> { + let arguments = + (ins LLVM_ScalarOrVectorOf:$val, + AnySignlessInteger:$power, + DefaultValuedAttr:$fastmathFlags); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } def LLVM_BitReverseOp : LLVM_UnaryIntrOpI<"bitreverse">; def LLVM_CountLeadingZerosOp : LLVM_CountZerosIntrOp<"ctlz">; diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index b3f13edb07eee..9515b2c9990af 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -4,7 +4,6 @@ add_mlir_dialect_library(MLIRLLVMDialect IR/FunctionCallUtils.cpp IR/LLVMAttrs.cpp IR/LLVMDialect.cpp - IR/LLVMIntrinsicOps.cpp IR/LLVMTypes.cpp IR/LLVMTypeSyntax.cpp diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 375ec6ff0e78a..c2b09619a5f52 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2612,6 +2612,9 @@ void LLVMDialect::initialize() { #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc" +#define GET_OP_CLASSES +#include "mlir/Dialect/LLVMIR/LLVMIntrinsicOps.cpp.inc" + LogicalResult LLVMDialect::verifyDataLayoutString( StringRef descr, llvm::function_ref reportError) { llvm::Expected maybeDataLayout = diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp deleted file mode 100644 index a5d85a7cf4ccb..0000000000000 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" - -using namespace mlir; -using namespace mlir::LLVM; - -#define GET_OP_CLASSES -#include "mlir/Dialect/LLVMIR/LLVMIntrinsicOps.cpp.inc" diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 81f402195fb4f..f2ef7081e2c6f 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -429,22 +429,22 @@ func.func @select(%arg0 : i1, %arg1 : i32, %arg2 : i32) -> i32 { // CHECK-LABEL: @minmaxi func.func @minmaxi(%arg0 : i32, %arg1 : i32) -> i32 { - // CHECK: = "llvm.intr.smin"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.smin(%arg0, %arg1) : (i32, i32) -> i32 %0 = arith.minsi %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.smax"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.smax(%arg0, %arg1) : (i32, i32) -> i32 %1 = arith.maxsi %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.umin"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.umin(%arg0, %arg1) : (i32, i32) -> i32 %2 = arith.minui %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.umax"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.umax(%arg0, %arg1) : (i32, i32) -> i32 %3 = arith.maxui %arg0, %arg1 : i32 return %0 : i32 } // CHECK-LABEL: @minmaxf func.func @minmaxf(%arg0 : f32, %arg1 : f32) -> f32 { - // CHECK: = "llvm.intr.minnum"(%arg0, %arg1) : (f32, f32) -> f32 + // CHECK: = llvm.intr.minnum(%arg0, %arg1) : (f32, f32) -> f32 %0 = arith.minf %arg0, %arg1 : f32 - // CHECK: = "llvm.intr.maxnum"(%arg0, %arg1) : (f32, f32) -> f32 + // CHECK: = llvm.intr.maxnum(%arg0, %arg1) : (f32, f32) -> f32 %1 = arith.maxf %arg0, %arg1 : f32 return %0 : f32 } diff --git a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir index 32409ca242c28..be46c22155a6c 100644 --- a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir +++ b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir @@ -147,6 +147,6 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: return %[[NORM]] : f32 diff --git a/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir b/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir index d8bf45d752669..b7756b3be543f 100644 --- a/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir @@ -66,6 +66,6 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: llvm.return %[[NORM]] : f32 diff --git a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir index 5ff58240358e7..f6d023b92aab9 100644 --- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir @@ -11,7 +11,7 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: llvm.return %[[NORM]] : f32 // CHECK-LABEL: llvm.func @complex_eq diff --git a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir index 7f4396b7b6d79..481067cf8d915 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir @@ -452,7 +452,7 @@ func.func @dfs_block_order(%arg0: i32) -> (i32) { // CHECK-LABEL: func @ceilf( // CHECK-SAME: f32 func.func @ceilf(%arg0 : f32) { - // CHECK: "llvm.intr.ceil"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.ceil(%arg0) : (f32) -> f32 %0 = math.ceil %arg0 : f32 func.return } @@ -462,7 +462,7 @@ func.func @ceilf(%arg0 : f32) { // CHECK-LABEL: func @floorf( // CHECK-SAME: f32 func.func @floorf(%arg0 : f32) { - // CHECK: "llvm.intr.floor"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.floor(%arg0) : (f32) -> f32 %0 = math.floor %arg0 : f32 func.return } @@ -503,9 +503,9 @@ func.func private @zero_result_func() // CHECK-SAME: %[[ARG0:.*]]: f32 // CHECK-SAME: %[[ARG1:.*]]: vector<4xf32> func.func @fmaf(%arg0: f32, %arg1: vector<4xf32>) { - // CHECK: %[[S:.*]] = "llvm.intr.fma"(%[[ARG0]], %[[ARG0]], %[[ARG0]]) : (f32, f32, f32) -> f32 + // CHECK: %[[S:.*]] = llvm.intr.fma(%[[ARG0]], %[[ARG0]], %[[ARG0]]) : (f32, f32, f32) -> f32 %0 = math.fma %arg0, %arg0, %arg0 : f32 - // CHECK: %[[V:.*]] = "llvm.intr.fma"(%[[ARG1]], %[[ARG1]], %[[ARG1]]) : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> + // CHECK: %[[V:.*]] = llvm.intr.fma(%[[ARG1]], %[[ARG1]], %[[ARG1]]) : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> %1 = math.fma %arg1, %arg1, %arg1 : vector<4xf32> func.return } diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir index 415ba1d9f001c..7f4b9634de3ba 100644 --- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir +++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir @@ -2,13 +2,13 @@ // CHECK-LABEL: @ops func.func @ops(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64) { - // CHECK: = "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.exp(%{{.*}}) : (f32) -> f32 %0 = math.exp %arg0 : f32 - // CHECK: = "llvm.intr.exp2"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.exp2(%{{.*}}) : (f32) -> f32 %1 = math.exp2 %arg0 : f32 - // CHECK: = "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %2 = math.sqrt %arg0 : f32 - // CHECK: = "llvm.intr.sqrt"(%{{.*}}) : (f64) -> f64 + // CHECK: = llvm.intr.sqrt(%{{.*}}) : (f64) -> f64 %3 = math.sqrt %arg4 : f64 func.return } @@ -29,7 +29,7 @@ func.func @absi(%arg0: i32) -> i32 { func.func @log1p(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %arg0 : f32 - // CHECK: %[[LOG:.*]] = "llvm.intr.log"(%[[ADD]]) : (f32) -> f32 + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (f32) -> f32 %0 = math.log1p %arg0 : f32 func.return } @@ -41,7 +41,7 @@ func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[EXTRACT]] : vector<3xf32> - // CHECK: %[[LOG:.*]] = "llvm.intr.log"(%[[ADD]]) : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (vector<3xf32>) -> vector<3xf32> // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[LOG]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> %0 = math.log1p %arg0 : vector<4x3xf32> func.return @@ -53,7 +53,7 @@ func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // CHECK-SAME: f32 func.func @expm1(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[EXP:.*]] = "llvm.intr.exp"(%arg0) : (f32) -> f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) : (f32) -> f32 // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : f32 %0 = math.expm1 %arg0 : f32 func.return @@ -65,7 +65,7 @@ func.func @expm1(%arg0 : f32) { // CHECK-SAME: f32 func.func @rsqrt(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (f32) -> f32 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (f32) -> f32 // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : f32 %0 = math.rsqrt %arg0 : f32 func.return @@ -76,7 +76,7 @@ func.func @rsqrt(%arg0 : f32) { // CHECK-LABEL: func @sine( // CHECK-SAME: f32 func.func @sine(%arg0 : f32) { - // CHECK: "llvm.intr.sin"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.sin(%arg0) : (f32) -> f32 %0 = math.sin %arg0 : f32 func.return } @@ -119,7 +119,7 @@ func.func @cttz_vec(%arg0 : vector<4xi32>) { // CHECK-LABEL: func @ctpop( // CHECK-SAME: i32 func.func @ctpop(%arg0 : i32) { - // CHECK: "llvm.intr.ctpop"(%arg0) : (i32) -> i32 + // CHECK: llvm.intr.ctpop(%arg0) : (i32) -> i32 %0 = math.ctpop %arg0 : i32 func.return } @@ -129,7 +129,7 @@ func.func @ctpop(%arg0 : i32) { // CHECK-LABEL: func @ctpop_vector( // CHECK-SAME: vector<3xi32> func.func @ctpop_vector(%arg0 : vector<3xi32>) { - // CHECK: "llvm.intr.ctpop"(%arg0) : (vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.ctpop(%arg0) : (vector<3xi32>) -> vector<3xi32> %0 = math.ctpop %arg0 : vector<3xi32> func.return } @@ -140,7 +140,7 @@ func.func @ctpop_vector(%arg0 : vector<3xi32>) { // CHECK-SAME: f64 func.func @rsqrt_double(%arg0 : f64) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f64) : f64 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (f64) -> f64 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (f64) -> f64 // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : f64 %0 = math.rsqrt %arg0 : f64 func.return @@ -152,7 +152,7 @@ func.func @rsqrt_double(%arg0 : f64) { // CHECK-SAME: vector<4xf32> func.func @rsqrt_vector(%arg0 : vector<4xf32>) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (vector<4xf32>) -> vector<4xf32> // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<4xf32> %0 = math.rsqrt %arg0 : vector<4xf32> func.return @@ -164,7 +164,7 @@ func.func @rsqrt_vector(%arg0 : vector<4xf32>) { func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[EXTRACT]]) : (vector<3xf32>) -> vector<3xf32> // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<3xf32> // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> %0 = math.rsqrt %arg0 : vector<4x3xf32> @@ -176,7 +176,7 @@ func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK-LABEL: func @powf( // CHECK-SAME: f64 func.func @powf(%arg0 : f64) { - // CHECK: %[[POWF:.*]] = "llvm.intr.pow"(%arg0, %arg0) : (f64, f64) -> f64 + // CHECK: %[[POWF:.*]] = llvm.intr.pow(%arg0, %arg0) : (f64, f64) -> f64 %0 = math.powf %arg0, %arg0 : f64 func.return } @@ -186,7 +186,7 @@ func.func @powf(%arg0 : f64) { // CHECK-LABEL: func @round( // CHECK-SAME: f32 func.func @round(%arg0 : f32) { - // CHECK: "llvm.intr.round"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.round(%arg0) : (f32) -> f32 %0 = math.round %arg0 : f32 func.return } @@ -196,7 +196,7 @@ func.func @round(%arg0 : f32) { // CHECK-LABEL: func @roundeven( // CHECK-SAME: f32 func.func @roundeven(%arg0 : f32) { - // CHECK: "llvm.intr.roundeven"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.roundeven(%arg0) : (f32) -> f32 %0 = math.roundeven %arg0 : f32 func.return } @@ -206,7 +206,7 @@ func.func @roundeven(%arg0 : f32) { // CHECK-LABEL: func @trunc( // CHECK-SAME: f32 func.func @trunc(%arg0 : f32) { - // CHECK: "llvm.intr.trunc"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.trunc(%arg0) : (f32) -> f32 %0 = math.trunc %arg0 : f32 func.return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir index 7c25cf579f0dc..a0afe0dafcaa2 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir @@ -6,14 +6,14 @@ // CHECK-LABEL: @bitcount_scalar spirv.func @bitcount_scalar(%arg0: i16) "None" { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i16) -> i16 + // CHECK: llvm.intr.ctpop(%{{.*}}) : (i16) -> i16 %0 = spirv.BitCount %arg0: i16 spirv.Return } // CHECK-LABEL: @bitcount_vector spirv.func @bitcount_vector(%arg0: vector<3xi32>) "None" { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.ctpop(%{{.*}}) : (vector<3xi32>) -> vector<3xi32> %0 = spirv.BitCount %arg0: vector<3xi32> spirv.Return } @@ -24,14 +24,14 @@ spirv.func @bitcount_vector(%arg0: vector<3xi32>) "None" { // CHECK-LABEL: @bitreverse_scalar spirv.func @bitreverse_scalar(%arg0: i64) "None" { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i64) -> i64 + // CHECK: llvm.intr.bitreverse(%{{.*}}) : (i64) -> i64 %0 = spirv.BitReverse %arg0: i64 spirv.Return } // CHECK-LABEL: @bitreverse_vector spirv.func @bitreverse_vector(%arg0: vector<4xi32>) "None" { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<4xi32>) -> vector<4xi32> + // CHECK: llvm.intr.bitreverse(%{{.*}}) : (vector<4xi32>) -> vector<4xi32> %0 = spirv.BitReverse %arg0: vector<4xi32> spirv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir index dffa7dd0f392b..e1936e2fd8abe 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir @@ -6,9 +6,9 @@ // CHECK-LABEL: @ceil spirv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.ceil(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Ceil %arg0 : f32 - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.ceil(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Ceil %arg1 : vector<3xf16> spirv.Return } @@ -19,9 +19,9 @@ spirv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @cos spirv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.cos(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Cos %arg0 : f32 - // CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.cos(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Cos %arg1 : vector<3xf16> spirv.Return } @@ -32,9 +32,9 @@ spirv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @exp spirv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Exp %arg0 : f32 - // CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.exp(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Exp %arg1 : vector<3xf16> spirv.Return } @@ -45,9 +45,9 @@ spirv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fabs spirv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.FAbs %arg0 : f32 - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.fabs(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FAbs %arg1 : vector<3xf16> spirv.Return } @@ -58,9 +58,9 @@ spirv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @floor spirv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.floor(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Floor %arg0 : f32 - // CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.floor(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Floor %arg1 : vector<3xf16> spirv.Return } @@ -71,9 +71,9 @@ spirv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fmax spirv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spirv.GL.FMax %arg0, %arg0 : f32 - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FMax %arg1, %arg1 : vector<3xf16> spirv.Return } @@ -84,9 +84,9 @@ spirv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fmin spirv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spirv.GL.FMin %arg0, %arg0 : f32 - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FMin %arg1, %arg1 : vector<3xf16> spirv.Return } @@ -97,9 +97,9 @@ spirv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @log spirv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Log %arg0 : f32 - // CHECK: "llvm.intr.log"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.log(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Log %arg1 : vector<3xf16> spirv.Return } @@ -110,9 +110,9 @@ spirv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @sin spirv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.sin"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.sin(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Sin %arg0 : f32 - // CHECK: "llvm.intr.sin"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.sin(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Sin %arg1 : vector<3xf16> spirv.Return } @@ -123,9 +123,9 @@ spirv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @smax spirv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 + // CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spirv.GL.SMax %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spirv.GL.SMax %arg1, %arg1 : vector<3xi32> spirv.Return } @@ -136,9 +136,9 @@ spirv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK-LABEL: @smin spirv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 + // CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spirv.GL.SMin %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spirv.GL.SMin %arg1, %arg1 : vector<3xi32> spirv.Return } @@ -149,9 +149,9 @@ spirv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK-LABEL: @sqrt spirv.func @sqrt(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Sqrt %arg0 : f32 - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.sqrt(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Sqrt %arg1 : vector<3xf16> spirv.Return } @@ -162,8 +162,8 @@ spirv.func @sqrt(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @tan spirv.func @tan(%arg0: f32) "None" { - // CHECK: %[[SIN:.*]] = "llvm.intr.sin"(%{{.*}}) : (f32) -> f32 - // CHECK: %[[COS:.*]] = "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + // CHECK: %[[SIN:.*]] = llvm.intr.sin(%{{.*}}) : (f32) -> f32 + // CHECK: %[[COS:.*]] = llvm.intr.cos(%{{.*}}) : (f32) -> f32 // CHECK: llvm.fdiv %[[SIN]], %[[COS]] : f32 %0 = spirv.GL.Tan %arg0 : f32 spirv.Return @@ -177,7 +177,7 @@ spirv.func @tan(%arg0: f32) "None" { spirv.func @tanh(%arg0: f32) "None" { // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2.000000e+00 : f32) : f32 // CHECK: %[[X2:.*]] = llvm.fmul %[[TWO]], %{{.*}} : f32 - // CHECK: %[[EXP:.*]] = "llvm.intr.exp"(%[[X2]]) : (f32) -> f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%[[X2]]) : (f32) -> f32 // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[T0:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : f32 // CHECK: %[[T1:.*]] = llvm.fadd %[[EXP]], %[[ONE]] : f32 @@ -193,7 +193,7 @@ spirv.func @tanh(%arg0: f32) "None" { // CHECK-LABEL: @inverse_sqrt spirv.func @inverse_sqrt(%arg0: f32) "None" { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 // CHECK: llvm.fdiv %[[ONE]], %[[SQRT]] : f32 %0 = spirv.GL.InverseSqrt %arg0 : f32 spirv.Return diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index ed4d398780e16..0a4732aecf0fc 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -404,14 +404,14 @@ func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: v // CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] // CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] // CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T9:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> // CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32> // CHECK: %[[T14Insert:.*]] = llvm.insertelement %[[T13]] // CHECK: %[[T14:.*]] = llvm.shufflevector %[[T14Insert]] // CHECK: %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> // CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> // CHECK: return %[[T19]] : vector<2x3xf32> @@ -1103,29 +1103,29 @@ func.func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>, %c: vector<1x1x1xf // CHECK-SAME: %[[B:.*]]: vector<2x4xf32> // CHECK-SAME: %[[C:.*]]: vector<1x1x1xf32> // CHECK: %[[BL:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> - // CHECK: "llvm.intr.fmuladd" + // CHECK: llvm.intr.fmuladd // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %0 = vector.fma %a, %a, %a : vector<8xf32> // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) : + // CHECK: %[[B0:.*]] = llvm.intr.fmuladd(%[[b00]], %[[b01]], %[[b02]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) : + // CHECK: %[[B1:.*]] = llvm.intr.fmuladd(%[[b10]], %[[b11]], %[[b12]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> %1 = vector.fma %b, %b, %b : vector<2x4xf32> - // CHECK: %[[C0:.*]] = "llvm.intr.fmuladd" + // CHECK: %[[C0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> %2 = vector.fma %c, %c, %c : vector<1x1x1xf32> - // CHECK: %[[D0:.*]] = "llvm.intr.fmuladd" + // CHECK: %[[D0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> %3 = vector.fma %d, %d, %d : vector diff --git a/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir b/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir index e1cfd0c44f89b..b98d2e08b7548 100644 --- a/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir @@ -9,8 +9,8 @@ llvm.func @fdiv_fp16(%arg0 : f16, %arg1 : f16) -> f16 { // CHECK-DAG: %[[rcp:.*]] = nvvm.rcp.approx.ftz.f %[[rhs]] : f32 // CHECK-DAG: %[[approx:.*]] = llvm.fmul %[[lhs]], %[[rcp]] : f32 // CHECK-DAG: %[[neg:.*]] = llvm.fneg %[[rhs]] : f32 - // CHECK-DAG: %[[err:.*]] = "llvm.intr.fma"(%[[approx]], %[[neg]], %[[lhs]]) : (f32, f32, f32) -> f32 - // CHECK-DAG: %[[refined:.*]] = "llvm.intr.fma"(%[[err]], %[[rcp]], %[[approx]]) : (f32, f32, f32) -> f32 + // CHECK-DAG: %[[err:.*]] = llvm.intr.fma(%[[approx]], %[[neg]], %[[lhs]]) : (f32, f32, f32) -> f32 + // CHECK-DAG: %[[refined:.*]] = llvm.intr.fma(%[[err]], %[[rcp]], %[[approx]]) : (f32, f32, f32) -> f32 // CHECK-DAG: %[[cast:.*]] = llvm.bitcast %[[approx]] : f32 to i32 // CHECK-DAG: %[[exp:.*]] = llvm.and %[[cast]], %[[mask]] : i32 // CHECK-DAG: %[[is_zero:.*]] = llvm.icmp "eq" %[[exp]], %[[c0]] : i32 diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 34ce89504c530..884a53dd40cd1 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -146,23 +146,23 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: %{{.*}} = llvm.fneg %[[FLOAT]] : f32 %29 = llvm.fneg %arg1 : f32 -// CHECK: "llvm.intr.sin"(%[[FLOAT]]) : (f32) -> f32 - %30 = "llvm.intr.sin"(%arg1) : (f32) -> f32 +// CHECK: llvm.intr.sin(%[[FLOAT]]) : (f32) -> f32 + %30 = llvm.intr.sin(%arg1) : (f32) -> f32 -// CHECK: "llvm.intr.pow"(%[[FLOAT]], %[[FLOAT]]) : (f32, f32) -> f32 - %31 = "llvm.intr.pow"(%arg1, %arg1) : (f32, f32) -> f32 +// CHECK: llvm.intr.pow(%[[FLOAT]], %[[FLOAT]]) : (f32, f32) -> f32 + %31 = llvm.intr.pow(%arg1, %arg1) : (f32, f32) -> f32 -// CHECK: "llvm.intr.powi"(%[[FLOAT]], %[[I32]]) : (f32, i32) -> f32 - %a31 = "llvm.intr.powi"(%arg1, %arg0) : (f32, i32) -> f32 +// CHECK: llvm.intr.powi(%[[FLOAT]], %[[I32]]) : (f32, i32) -> f32 + %a31 = llvm.intr.powi(%arg1, %arg0) : (f32, i32) -> f32 -// CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 - %32 = "llvm.intr.bitreverse"(%arg0) : (i32) -> i32 +// CHECK: llvm.intr.bitreverse(%{{.*}}) : (i32) -> i32 + %32 = llvm.intr.bitreverse(%arg0) : (i32) -> i32 -// CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i32) -> i32 - %33 = "llvm.intr.ctpop"(%arg0) : (i32) -> i32 +// CHECK: llvm.intr.ctpop(%{{.*}}) : (i32) -> i32 + %33 = llvm.intr.ctpop(%arg0) : (i32) -> i32 -// CHECK: "llvm.intr.round"(%[[FLOAT]]) : (f32) -> f32 - %34 = "llvm.intr.round"(%arg1) : (f32) -> f32 +// CHECK: llvm.intr.round(%[[FLOAT]]) : (f32) -> f32 + %34 = llvm.intr.round(%arg1) : (f32) -> f32 // CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () @@ -483,6 +483,11 @@ func.func @fastmathFlags(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: vector<2 x f // CHECK: {{.*}} = llvm.fneg %arg0 : f32 %10 = llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 + +// CHECK: {{.*}} = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %11 = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 +// CHECK: {{.*}} = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %12 = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 return } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index fad78b2b0ca96..ba276047a742c 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -796,12 +796,12 @@ func.func @omp_atomic_update(%x : memref, %expr : i32, %xBool : memref, } // CHECK: omp.atomic.update %[[X]] : memref // CHECK-NEXT: (%[[XVAL:.*]]: i32): - // CHECK-NEXT: %[[NEWVAL:.*]] = "llvm.intr.smax"(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.intr.smax(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 // CHECK-NEXT: omp.yield(%[[NEWVAL]] : i32) // CHECK-NEXT: } omp.atomic.update %x : memref { ^bb0(%xval: i32): - %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32 + %newval = llvm.intr.smax(%xval, %expr) : (i32, i32) -> i32 omp.yield(%newval : i32) } diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index d8fca91860053..b8ea328eec6df 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -6,13 +6,13 @@ define void @intrinsics() { ; CHECK-LABEL: llvm.func @fmuladd_test define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { - ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + ; CHECK: llvm.intr.fmuladd(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 %5 = call float @llvm.fmuladd.f32(float %0, float %1, float %0) - ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fmuladd(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) - ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + ; CHECK: llvm.intr.fma(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 %7 = call float @llvm.fma.f32(float %0, float %1, float %0) - ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fma(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %8 = call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) ; CHECK: "llvm.intr.prefetch"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i32, i32, i32) -> () call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) @@ -21,111 +21,111 @@ define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { ; CHECK-LABEL: llvm.func @exp_test define void @exp_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.exp.f32(float %0) - ; CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.exp(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.exp.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @exp2_test define void @exp2_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.exp2(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.exp2.f32(float %0) - ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.exp2(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.exp2.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log_test define void @log_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log.f32(float %0) - ; CHECK: "llvm.intr.log"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log10_test define void @log10_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log10"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log10(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log10.f32(float %0) - ; CHECK: "llvm.intr.log10"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log10(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log10.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log2_test define void @log2_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log2"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log2(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log2.f32(float %0) - ; CHECK: "llvm.intr.log2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log2(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log2.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @fabs_test define void @fabs_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.fabs.f32(float %0) - ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fabs(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @sqrt_test define void @sqrt_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.sqrt.f32(float %0) - ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.sqrt(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @ceil_test define void @ceil_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.ceil(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.ceil.f32(float %0) - ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.ceil(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @floor_test define void @floor_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.floor(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.floor.f32(float %0) - ; CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.floor(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @cos_test define void @cos_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.cos(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.cos.f32(float %0) - ; CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.cos(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @copysign_test define void @copysign_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.copysign(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.copysign.f32(float %0, float %1) - ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.copysign(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.copysign.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @pow_test define void @pow_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.pow(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.pow.f32(float %0, float %1) - ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.pow(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.pow.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @bitreverse_test define void @bitreverse_test(i32 %0, <8 x i32> %1) { - ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 + ; CHECK: llvm.intr.bitreverse(%{{.*}}) : (i32) -> i32 %3 = call i32 @llvm.bitreverse.i32(i32 %0) - ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.bitreverse(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> %4 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %1) ret void } @@ -152,81 +152,81 @@ define void @cttz_test(i32 %0, <8 x i32> %1) { ; CHECK-LABEL: llvm.func @ctpop_test define void @ctpop_test(i32 %0, <8 x i32> %1) { - ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i32) -> i32 + ; CHECK: llvm.intr.ctpop(%{{.*}}) : (i32) -> i32 %3 = call i32 @llvm.ctpop.i32(i32 %0) - ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.ctpop(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> %4 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %1) ret void } ; CHECK-LABEL: llvm.func @maximum_test define void @maximum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.maximum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.maximum.f32(float %0, float %1) - ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.maximum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.maximum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @minimum_test define void @minimum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.minimum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.minimum.f32(float %0, float %1) - ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.minimum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.minimum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @maxnum_test define void @maxnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.maxnum.f32(float %0, float %1) - ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @minnum_test define void @minnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.minnum.f32(float %0, float %1) - ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @smax_test define void @smax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.smax.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @smin_test define void @smin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.smin.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @umax_test define void @umax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.umax(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.umax.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.umax(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @umin_test define void @umin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.umin(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.umin.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.umin(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index bc9c287fa9a81..54eb0f5e04c2c 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1680,6 +1680,17 @@ llvm.func @fastmathFlags(%arg0: f32) { %14 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) %15 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) %16 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) + +// CHECK: call fast float @llvm.copysign.f32(float {{.*}}, float {{.*}}) + %17 = "llvm.intr.copysign"(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 +// CHECK: call afn float @llvm.copysign.f32(float {{.*}}, float {{.*}}) + %18 = "llvm.intr.copysign"(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + +// CHECK: call fast float @llvm.powi.f32.i32(float {{.*}}, i32 {{.*}}) + %exp = llvm.mlir.constant(1 : i32) : i32 + %19 = "llvm.intr.powi"(%arg0, %exp) {fastmathFlags = #llvm.fastmath} : (f32, i32) -> f32 +// CHECK: call afn float @llvm.powi.f32.i32(float {{.*}}, i32 {{.*}}) + %20 = "llvm.intr.powi"(%arg0, %exp) {fastmathFlags = #llvm.fastmath} : (f32, i32) -> f32 llvm.return } From 2f323e04972c98d47af9ca5e5b17b64d8ce6a5d6 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 2 Nov 2022 20:46:09 +0100 Subject: [PATCH 070/516] [flang] Fix for polymoprhic pointer component Fix path that generates MutableBox for pointer component. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D137270 --- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 2 +- flang/test/Lower/allocatable-polymorphic.f90 | 27 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 521f469b4ea2f..984c2459cac66 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -920,7 +920,7 @@ fir::ExtendedValue fir::factory::componentToExtendedValue( auto fieldTy = component.getType(); if (auto ty = fir::dyn_cast_ptrEleTy(fieldTy)) fieldTy = ty; - if (fieldTy.isa()) { + if (fieldTy.isa()) { llvm::SmallVector nonDeferredTypeParams; auto eleTy = fir::unwrapSequenceType(fir::dyn_cast_ptrOrBoxEleTy(fieldTy)); if (auto charTy = eleTy.dyn_cast()) { diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index 6a0fa45234fde..c82df40045fc2 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -17,6 +17,10 @@ module poly procedure :: proc2 => proc2_p2 end type + type with_alloc + class(p1), pointer :: element + end type + contains subroutine proc1_p1() print*, 'call proc1_p1' @@ -348,8 +352,31 @@ subroutine test_deallocate() allocate(p) deallocate(p) end subroutine + + subroutine test_type_with_polymorphic_pointer_component() + type(with_alloc), pointer :: a + allocate(a) + allocate(a%element) + end subroutine end module +! CHECK-LABEL: func.func @_QMpolyPtest_type_with_polymorphic_pointer_component() +! CHECK: %[[TYPE_PTR:.*]] = fir.alloca !fir.ptr>>}>> {uniq_name = "_QMpolyFtest_type_with_polymorphic_pointer_componentEa.addr"} +! CHECK: %[[TYPE_PTR_LOAD:.*]] = fir.load %[[TYPE_PTR]] : !fir.ref>>}>>> +! CHECK: %[[ELEMENT:.*]] = fir.field_index element, !fir.type<_QMpolyTwith_alloc{element:!fir.class>>}> +! CHECK: %[[ELEMENT_DESC:.*]] = fir.coordinate_of %[[TYPE_PTR_LOAD]], %[[ELEMENT]] : (!fir.ptr>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr> +! CHECK: %[[ZERO_DESC:.*]] = fir.embox %[[ZERO]] : (!fir.ptr>) -> !fir.class>> +! CHECK: fir.store %[[ZERO_DESC]] to %[[ELEMENT_DESC]] : !fir.ref>>> +! CHECK: %[[TYPE_DESC_P1:.*]] = fir.address_of(@_QMpolyE.dt.p1) : !fir.ref> +! CHECK: %[[ELEMENT_DESC_CAST:.*]] = fir.convert %[[ELEMENT_DESC]] : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[TYPE_DESC_P1_CAST:.*]] = fir.convert %[[TYPE_DESC_P1]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[RANK:.*]] = arith.constant 0 : i32 +! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 +! CHECK: %{{.*}} = fir.call @_FortranAPointerNullifyDerived(%[[ELEMENT_DESC_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[CORANK]]) : (!fir.ref>, !fir.ref, i32, i32) -> none +! CHECK: %[[ELEMENT_DESC_CAST:.*]] = fir.convert %[[ELEMENT_DESC]] : (!fir.ref>>>) -> !fir.ref> +! CHECK: %{{.*}} = fir.call @_FortranAPointerAllocate(%[[ELEMENT_DESC_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 + program test_alloc use poly From 31b9dad085369662a037b0e561ae3ac59b54bbbf Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 2 Nov 2022 20:47:19 +0100 Subject: [PATCH 071/516] [flang][NFC] Fix typo in filename --- .../Lower/{nullify-polymoprhic.f90 => nullify-polymorphic.f90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename flang/test/Lower/{nullify-polymoprhic.f90 => nullify-polymorphic.f90} (100%) diff --git a/flang/test/Lower/nullify-polymoprhic.f90 b/flang/test/Lower/nullify-polymorphic.f90 similarity index 100% rename from flang/test/Lower/nullify-polymoprhic.f90 rename to flang/test/Lower/nullify-polymorphic.f90 From 0807bc7e07f0430bd5b048d5c08f09442aab3b7d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 18 Oct 2022 11:10:19 -0700 Subject: [PATCH 072/516] [wasm-ld] Update supported features in the generic CPU configuration Accompanying https://reviews.llvm.org/D125728, this updates LLVM Codegen's "generic" CPU to enable the same new features. Differential Revision: https://reviews.llvm.org/D125729 --- lld/test/wasm/function-imports-first.ll | 2 +- lld/test/wasm/gc-sections.ll | 2 +- lld/test/wasm/init-fini.ll | 4 +-- lld/test/wasm/local-symbols.ll | 2 +- lld/test/wasm/locals-duplicate.test | 4 +-- lld/test/wasm/signature-mismatch-export.ll | 2 +- lld/test/wasm/weak-alias-overide.ll | 4 +-- lld/test/wasm/weak-alias.ll | 4 +-- llvm/lib/Target/WebAssembly/WebAssembly.td | 8 ++++- llvm/test/CodeGen/WebAssembly/PR41149.ll | 5 ++- llvm/test/CodeGen/WebAssembly/bulk-memory.ll | 4 +-- .../test/CodeGen/WebAssembly/bulk-memory64.ll | 4 +-- llvm/test/CodeGen/WebAssembly/byval.ll | 4 +-- llvm/test/CodeGen/WebAssembly/conv-trap.ll | 2 +- .../CodeGen/WebAssembly/fast-isel-noreg.ll | 4 +-- llvm/test/CodeGen/WebAssembly/global.ll | 4 +-- llvm/test/CodeGen/WebAssembly/legalize.ll | 2 +- .../CodeGen/WebAssembly/mem-intrinsics.ll | 2 +- .../CodeGen/WebAssembly/memory64-feature.ll | 2 +- llvm/test/CodeGen/WebAssembly/multivalue.ll | 8 ++--- .../CodeGen/WebAssembly/mutable-globals.ll | 2 +- .../CodeGen/WebAssembly/reference-types.ll | 2 +- .../CodeGen/WebAssembly/signext-zeroext.ll | 2 +- .../CodeGen/WebAssembly/simd-conversions.ll | 2 +- .../CodeGen/WebAssembly/simd-sext-inreg.ll | 2 +- llvm/test/CodeGen/WebAssembly/tailcall.ll | 4 +-- .../WebAssembly/target-features-tls.ll | 4 +-- .../CodeGen/WebAssembly/target-features.ll | 31 ++++++++++--------- llvm/test/MC/WebAssembly/array-fill.ll | 2 +- llvm/test/MC/WebAssembly/assembler-binary.ll | 4 +-- llvm/test/MC/WebAssembly/bss.ll | 2 +- llvm/test/MC/WebAssembly/comdat.ll | 6 ++-- llvm/test/MC/WebAssembly/debug-info.ll | 4 +-- llvm/test/MC/WebAssembly/debug-info64.ll | 4 +-- llvm/test/MC/WebAssembly/explicit-sections.ll | 2 +- llvm/test/MC/WebAssembly/global-ctor-dtor.ll | 2 +- llvm/test/MC/WebAssembly/unnamed-data.ll | 2 +- llvm/test/MC/WebAssembly/visibility.ll | 2 +- 38 files changed, 80 insertions(+), 72 deletions(-) diff --git a/lld/test/wasm/function-imports-first.ll b/lld/test/wasm/function-imports-first.ll index 7552fb428b0ba..b4e984092f82d 100644 --- a/lld/test/wasm/function-imports-first.ll +++ b/lld/test/wasm/function-imports-first.ll @@ -1,5 +1,5 @@ ; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: wasm-ld -o %t.wasm %t.o %t.ret32.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/gc-sections.ll b/lld/test/wasm/gc-sections.ll index de8298697bf12..762933d0ef44b 100644 --- a/lld/test/wasm/gc-sections.ll +++ b/lld/test/wasm/gc-sections.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: yaml2obj %S/Inputs/globals.yaml -o %t_globals.o ; RUN: wasm-ld -print-gc-sections -o %t1.wasm %t.o %t_globals.o | \ ; RUN: FileCheck %s -check-prefix=PRINT-GC diff --git a/lld/test/wasm/init-fini.ll b/lld/test/wasm/init-fini.ll index 5631d58d68e5c..15154f36d3d3b 100644 --- a/lld/test/wasm/init-fini.ll +++ b/lld/test/wasm/init-fini.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/global-ctor-dtor.ll -o %t.global-ctor-dtor.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/global-ctor-dtor.ll -o %t.global-ctor-dtor.o target triple = "wasm32-unknown-unknown" diff --git a/lld/test/wasm/local-symbols.ll b/lld/test/wasm/local-symbols.ll index f504603045858..93ed3c9d4eee7 100644 --- a/lld/test/wasm/local-symbols.ll +++ b/lld/test/wasm/local-symbols.ll @@ -1,5 +1,5 @@ ; Test that internal symbols can still be GC'd when with --export-dynamic. -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: wasm-ld --export-dynamic -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test index 7de8ef15b1840..5c3135a424e69 100644 --- a/lld/test/wasm/locals-duplicate.test +++ b/lld/test/wasm/locals-duplicate.test @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj %p/Inputs/locals-duplicate1.ll -o %t1.o -; RUN: llc -filetype=obj %p/Inputs/locals-duplicate2.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj %p/Inputs/locals-duplicate1.ll -o %t1.o +; RUN: llc -mcpu=mvp -filetype=obj %p/Inputs/locals-duplicate2.ll -o %t2.o ; RUN: wasm-ld --export-dynamic --no-entry -o %t.wasm %t1.o %t2.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/signature-mismatch-export.ll b/lld/test/wasm/signature-mismatch-export.ll index 1d5e2a77a6c49..b77b5092a092a 100644 --- a/lld/test/wasm/signature-mismatch-export.ll +++ b/lld/test/wasm/signature-mismatch-export.ll @@ -1,5 +1,5 @@ ; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.main.o ; RUN: wasm-ld --export=ret32 -o %t.wasm %t.main.o %t.ret32.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/weak-alias-overide.ll b/lld/test/wasm/weak-alias-overide.ll index fcf2293892910..ca6f4bf4230a2 100644 --- a/lld/test/wasm/weak-alias-overide.ll +++ b/lld/test/wasm/weak-alias-overide.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o ; RUN: wasm-ld --export-dynamic %t.o %t2.o -o %t.wasm ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll index aa0a271396d1a..cba39acda8e9c 100644 --- a/lld/test/wasm/weak-alias.ll +++ b/lld/test/wasm/weak-alias.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o ; RUN: wasm-ld --export-dynamic %t.o %t2.o -o %t.wasm ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index b83dcf3a8e656..7531d36a74a64 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -97,7 +97,13 @@ def WebAssemblyInstrInfo : InstrInfo; def : ProcessorModel<"mvp", NoSchedModel, []>; // Generic processor: latest stable version. -def : ProcessorModel<"generic", NoSchedModel, []>; +// +// This includes features that have achieved phase 4 of the standards process, +// and that are expected to work for most users in the current time, with +// consideration given to available support in relevant engines and tools, and +// the importance of the features. +def : ProcessorModel<"generic", NoSchedModel, + [FeatureSignExt, FeatureMutableGlobals]>; // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll index 428f84979d89e..d18bd9c4a3b88 100644 --- a/llvm/test/CodeGen/WebAssembly/PR41149.ll +++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll @@ -13,9 +13,8 @@ define void @mod() { ; CHECK-NEXT: i32.load8_u 0 ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.shl -; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.extend8_s +; CHECK-NEXT: i32.const 7 ; CHECK-NEXT: i32.shr_s ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.xor diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll index f739f08f70715..4ccc95c8f4928 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM ; Test that basic bulk memory codegen works correctly diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll index eaf9a9659429e..88cf6b58c0732 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM ; Test that basic bulk memory codegen works correctly diff --git a/llvm/test/CodeGen/WebAssembly/byval.ll b/llvm/test/CodeGen/WebAssembly/byval.ll index 5f0a71960b677..5a42f3b9438a0 100644 --- a/llvm/test/CodeGen/WebAssembly/byval.ll +++ b/llvm/test/CodeGen/WebAssembly/byval.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -fast-isel | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -mcpu=mvp | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -mcpu=mvp -fast-isel | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/conv-trap.ll b/llvm/test/CodeGen/WebAssembly/conv-trap.ll index 0906743374b93..4402880a8c383 100644 --- a/llvm/test/CodeGen/WebAssembly/conv-trap.ll +++ b/llvm/test/CodeGen/WebAssembly/conv-trap.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-nontrapping-fptoint | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-nontrapping-fptoint | FileCheck %s ; Test that basic conversion operations assemble as expected using ; the trapping opcodes and explicit code to suppress the trapping. diff --git a/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll b/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll index 1bc87d7c82eed..919ac6815717d 100644 --- a/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll +++ b/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -asm-verbose=false -wasm-keep-registers -fast-isel -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-keep-registers -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -wasm-keep-registers -fast-isel -verify-machineinstrs | FileCheck %s ; Test that FastISel does not generate instructions with NoReg diff --git a/llvm/test/CodeGen/WebAssembly/global.ll b/llvm/test/CodeGen/WebAssembly/global.ll index c3f6b2e23e4a4..dc9b909dc7ea4 100644 --- a/llvm/test/CodeGen/WebAssembly/global.ll +++ b/llvm/test/CodeGen/WebAssembly/global.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-atomics | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-atomics | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+atomics | FileCheck %s ; Test that globals assemble as expected. diff --git a/llvm/test/CodeGen/WebAssembly/legalize.ll b/llvm/test/CodeGen/WebAssembly/legalize.ll index 686bd23c9ff54..cd7d719bff11a 100644 --- a/llvm/test/CodeGen/WebAssembly/legalize.ll +++ b/llvm/test/CodeGen/WebAssembly/legalize.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test various types and operators that need to be legalized. diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll index 8491e246c7f66..d9ceb86f208d8 100644 --- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. diff --git a/llvm/test/CodeGen/WebAssembly/memory64-feature.ll b/llvm/test/CodeGen/WebAssembly/memory64-feature.ll index 53fccfe088883..bd277dfdc37d3 100644 --- a/llvm/test/CodeGen/WebAssembly/memory64-feature.ll +++ b/llvm/test/CodeGen/WebAssembly/memory64-feature.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -mcpu=mvp < %s | FileCheck %s ; Test that wasm64 is properly emitted into the target features section diff --git a/llvm/test/CodeGen/WebAssembly/multivalue.ll b/llvm/test/CodeGen/WebAssembly/multivalue.ll index a0f36ea8265e0..0080052e8f7a0 100644 --- a/llvm/test/CodeGen/WebAssembly/multivalue.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mattr=+multivalue,+tail-call | FileCheck %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mattr=+reference-types,+multivalue,+tail-call | FileCheck --check-prefix REF %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+multivalue,+tail-call | FileCheck %s --check-prefix REGS -; RUN: llc < %s --filetype=obj -mattr=+multivalue,+tail-call | obj2yaml | FileCheck %s --check-prefix OBJ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mcpu=mvp -mattr=+multivalue,+tail-call | FileCheck %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mcpu=mvp -mattr=+reference-types,+multivalue,+tail-call | FileCheck --check-prefix REF %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+multivalue,+tail-call | FileCheck %s --check-prefix REGS +; RUN: llc < %s --filetype=obj -mcpu=mvp -mattr=+multivalue,+tail-call | obj2yaml | FileCheck %s --check-prefix OBJ ; Test that the multivalue calls, returns, function types, and block ; types work as expected. diff --git a/llvm/test/CodeGen/WebAssembly/mutable-globals.ll b/llvm/test/CodeGen/WebAssembly/mutable-globals.ll index df698c1a11b82..93962f7e6d92c 100644 --- a/llvm/test/CodeGen/WebAssembly/mutable-globals.ll +++ b/llvm/test/CodeGen/WebAssembly/mutable-globals.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+mutable-globals | FileCheck %s +; RUN: llc < %s -mcpu=mvp -mattr=+mutable-globals | FileCheck %s ; Test that mutable globals is properly emitted into the target features section diff --git a/llvm/test/CodeGen/WebAssembly/reference-types.ll b/llvm/test/CodeGen/WebAssembly/reference-types.ll index d56541db8572e..168aaec8f0943 100644 --- a/llvm/test/CodeGen/WebAssembly/reference-types.ll +++ b/llvm/test/CodeGen/WebAssembly/reference-types.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+reference-types | FileCheck %s +; RUN: llc < %s -mcpu=mvp -mattr=+reference-types | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll b/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll index f5dbfeb78c1db..1dafbe58a7f8c 100644 --- a/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll +++ b/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mcpu=mvp -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test zeroext and signext ABI keywords diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index f945b8e061717..8459ec8101ff2 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+simd128 | FileCheck %s ; Test that vector float-to-int and int-to-float instructions lower correctly diff --git a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll index c4b94381e39a6..45080d14dfd29 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mcpu=mvp -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128 ; Test that vector sign extensions lower to shifts diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll index d0f39e6416ba8..3d96c666ddc58 100644 --- a/llvm/test/CodeGen/WebAssembly/tailcall.ll +++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s ; RUN: llc < %s --filetype=obj -mattr=+tail-call | obj2yaml | FileCheck --check-prefix=YAML %s ; Test that the tail calls lower correctly diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 57d14053f3342..45bc06b5d5c96 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM -; RUN: llc < %s -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM +; RUN: llc < %s -mcpu=mvp -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mcpu=mvp -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. diff --git a/llvm/test/CodeGen/WebAssembly/target-features.ll b/llvm/test/CodeGen/WebAssembly/target-features.ll index ecb49766659b0..4debf66fe0f7a 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s | FileCheck %s --check-prefixes CHECK,ATTRS -; RUN: llc < %s -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -mcpu=mvp -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 ; RUN: llc < %s -mcpu=bleeding-edge | FileCheck %s --check-prefixes CHECK,BLEEDING-EDGE ; Test that codegen emits target features from the command line or @@ -55,17 +55,22 @@ attributes #2 = { "target-features"="+reference-types" } ; CHECK-LABEL: .custom_section.target_features,"",@ -; +atomics, +nontrapping-fptoint, +reference-types -; ATTRS-NEXT: .int8 3 -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 7 -; ATTRS-NEXT: .ascii "atomics" -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 19 -; ATTRS-NEXT: .ascii "nontrapping-fptoint" -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 15 -; ATTRS-NEXT: .ascii "reference-types" +; +atomics, +reference-types, +mutable-globals +; ATTRS-NEXT: .int8 5 +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 7 +; ATTRS-NEXT: .ascii "atomics" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 15 +; ATTRS-NEXT: .ascii "mutable-globals" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 19 +; ATTRS-NEXT: .ascii "nontrapping-fptoint" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 15 +; ATTRS-NEXT: .ascii "reference-types" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 8 ; +atomics, +nontrapping-fptoint, +reference-types, +simd128 ; SIMD128-NEXT: .int8 4 @@ -109,5 +114,3 @@ attributes #2 = { "target-features"="+reference-types" } ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 9 ; BLEEDING-EDGE-NEXT: .ascii "tail-call" - -; CHECK-NEXT: .text diff --git a/llvm/test/MC/WebAssembly/array-fill.ll b/llvm/test/MC/WebAssembly/array-fill.ll index 2f4bf83205a4d..4725d4eda065b 100644 --- a/llvm/test/MC/WebAssembly/array-fill.ll +++ b/llvm/test/MC/WebAssembly/array-fill.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s ; PR33624 source_filename = "ws.c" diff --git a/llvm/test/MC/WebAssembly/assembler-binary.ll b/llvm/test/MC/WebAssembly/assembler-binary.ll index c3d6bd588d24a..815d347047974 100644 --- a/llvm/test/MC/WebAssembly/assembler-binary.ll +++ b/llvm/test/MC/WebAssembly/assembler-binary.ll @@ -1,7 +1,7 @@ -; RUN: llc -filetype=asm -asm-verbose=false %s -o %t.s +; RUN: llc -mcpu=mvp -filetype=asm -asm-verbose=false %s -o %t.s ; RUN: FileCheck -check-prefix=ASM -input-file %t.s %s ; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=asm %t.s -o - | FileCheck -check-prefix=ASM %s -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s ; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %t.s -o - | obj2yaml | FileCheck %s ; This specifically tests that we can generate a binary from the assembler diff --git a/llvm/test/MC/WebAssembly/bss.ll b/llvm/test/MC/WebAssembly/bss.ll index 05b6a6986e119..d1d9f5622def7 100644 --- a/llvm/test/MC/WebAssembly/bss.ll +++ b/llvm/test/MC/WebAssembly/bss.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/comdat.ll b/llvm/test/MC/WebAssembly/comdat.ll index 6f8c1403be749..0886301597471 100644 --- a/llvm/test/MC/WebAssembly/comdat.ll +++ b/llvm/test/MC/WebAssembly/comdat.ll @@ -1,6 +1,6 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s -; RUN: llc -filetype=asm %s -asm-verbose=false -o - | FileCheck --check-prefix=ASM %s -; RUN: llc -filetype=asm %s -o - | llvm-mc -triple=wasm32 -filetype=obj -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=asm %s -asm-verbose=false -o - | FileCheck --check-prefix=ASM %s +; RUN: llc -mcpu=mvp -filetype=asm %s -o - | llvm-mc -triple=wasm32 -filetype=obj -o - | obj2yaml | FileCheck %s ; These RUN lines verify the ll direct-to-object path, the ll->asm path, and the ; object output via asm. diff --git a/llvm/test/MC/WebAssembly/debug-info.ll b/llvm/test/MC/WebAssembly/debug-info.ll index d2a815f097164..a60557b65e3ba 100644 --- a/llvm/test/MC/WebAssembly/debug-info.ll +++ b/llvm/test/MC/WebAssembly/debug-info.ll @@ -1,4 +1,4 @@ -; RUN: llc -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s +; RUN: llc -mcpu=mvp -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s ; CHECK: Format: WASM ; CHECK-NEXT:Arch: wasm32 @@ -279,7 +279,7 @@ entry: ret void, !dbg !18 } -attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mvp" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!13, !14, !15} diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll index 47b33aac104c6..da204b9753a50 100644 --- a/llvm/test/MC/WebAssembly/debug-info64.ll +++ b/llvm/test/MC/WebAssembly/debug-info64.ll @@ -1,4 +1,4 @@ -; RUN: llc -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s +; RUN: llc -mcpu=mvp -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s ; CHECK: Format: WASM ; CHECK-NEXT: Arch: wasm64 @@ -285,7 +285,7 @@ entry: ret void, !dbg !18 } -attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mvp" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!13, !14, !15} diff --git a/llvm/test/MC/WebAssembly/explicit-sections.ll b/llvm/test/MC/WebAssembly/explicit-sections.ll index ae04051f458e0..a65172b22d467 100644 --- a/llvm/test/MC/WebAssembly/explicit-sections.ll +++ b/llvm/test/MC/WebAssembly/explicit-sections.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll index df1252ef9ce15..97b40e8a5d4bb 100644 --- a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll +++ b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/unnamed-data.ll b/llvm/test/MC/WebAssembly/unnamed-data.ll index 1fe6df2f77afc..398d53c826374 100644 --- a/llvm/test/MC/WebAssembly/unnamed-data.ll +++ b/llvm/test/MC/WebAssembly/unnamed-data.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/visibility.ll b/llvm/test/MC/WebAssembly/visibility.ll index 5bb757b28f18a..69b273ecbf25e 100644 --- a/llvm/test/MC/WebAssembly/visibility.ll +++ b/llvm/test/MC/WebAssembly/visibility.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" From 86f9655373a540bd092804beaaf1962f84a38fa7 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 2 Nov 2022 13:00:35 -0700 Subject: [PATCH 073/516] [LV][RISCV] Add test showing poor choice of VF for short loop --- .../LoopVectorize/RISCV/short-trip-count.ll | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll new file mode 100644 index 0000000000000..4ba2ae23daeaf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv64 -mattr=+v -loop-vectorize < %s | FileCheck %s + +; FIXME: Using a <4 x i32> would be strictly better than tail folded +; scalable vectorization in this case. +define void @small_trip_count(i32* nocapture %a) nounwind vscale_range(4,1024) { +; CHECK-LABEL: @small_trip_count( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 -5, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP7]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP8]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0nxv2i32(* [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32( [[TMP12]], * [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, i32* %a, i32 %iv + %v = load i32, i32* %gep, align 4 + %add = add nsw i32 %v, 1 + store i32 %add, i32* %gep, align 4 + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv, 3 + br i1 %cond, label %exit, label %loop + +exit: + ret void +} From 6254495c6b4f35ad862c9a5fc28720664d28d816 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Nov 2022 09:18:41 -0700 Subject: [PATCH 074/516] [RISCV] Move RVVBitsPerBlock to TargetParser.h so we can use it in clang. NFC Differential Revision: https://reviews.llvm.org/D137266 --- clang/lib/Basic/Targets/RISCV.cpp | 3 ++- llvm/include/llvm/Support/TargetParser.h | 3 +++ llvm/lib/Target/RISCV/RISCVISelLowering.h | 6 +----- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 08da01602599f..36fa962749820 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -255,7 +255,8 @@ RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const { if (unsigned MinVLen = ISAInfo->getMinVLen()) { unsigned MaxVLen = ISAInfo->getMaxVLen(); // RISCV::RVVBitsPerBlock is 64. - return std::pair(MinVLen/64, MaxVLen/64); + return std::make_pair(MinVLen / llvm::RISCV::RVVBitsPerBlock, + MaxVLen / llvm::RISCV::RVVBitsPerBlock); } return None; diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index e9920a50bae8e..39222b02e21ad 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -157,6 +157,9 @@ IsaVersion getIsaVersion(StringRef GPU); namespace RISCV { +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; + enum CPUKind : unsigned { #define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM, #define TUNE_PROC(ENUM, NAME) CK_##ENUM, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 1b8a4c557afa2..4f07d9ece8f39 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/TargetParser.h" namespace llvm { class RISCVSubtarget; @@ -325,11 +326,6 @@ enum NodeType : unsigned { }; } // namespace RISCVISD -namespace RISCV { -// We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; -} // namespace RISCV - class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; From 9b3834ef67e3f320fe78dd63f6d5f1bafeafba78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 31 Oct 2022 12:18:21 +0200 Subject: [PATCH 075/516] [clang] Fix inline builtin functions of an __asm__ renamed function with symbol prefixes If a function is renamed with `__asm__`, the name provided is the exact symbol name, without any extra implicit symbol prefixes. If the target does use symbol prefixes, the IR level symbol gets an `\01` prefix to indicate that it's a literal symbol name to be taken as is. When a builtin function is specialized by providing an inline version of it, that inline function is named `.inline`. When the base function has been renamed due to `__asm__`, the inline function ends up named `.inline`. Up to this point, things did work as expected before. However, for targets with symbol prefixes, one codepath that produced the combined name `.inline` used the mangled `asmname` with `\01` prefix, while others didn't. This patch fixes this. This fixes the combination of asm renamed builtin function, with inline override of the function, on any target with symbol prefixes (such as i386 windows and any Darwin target). Differential Revision: https://reviews.llvm.org/D137073 --- clang/lib/CodeGen/CGExpr.cpp | 3 +- clang/test/CodeGen/inline-builtin-asm-name.c | 32 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/inline-builtin-asm-name.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index af753767f0328..493b340ecdc52 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5014,8 +5014,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str(); std::string NoBuiltins = "no-builtins"; - auto *A = FD->getAttr(); - StringRef Ident = A ? A->getLabel() : FD->getName(); + StringRef Ident = CGF.CGM.getMangledName(GD); std::string FDInlineName = (Ident + ".inline").str(); bool IsPredefinedLibFunction = diff --git a/clang/test/CodeGen/inline-builtin-asm-name.c b/clang/test/CodeGen/inline-builtin-asm-name.c new file mode 100644 index 0000000000000..969174f7ac0d2 --- /dev/null +++ b/clang/test/CodeGen/inline-builtin-asm-name.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s -disable-llvm-optzns | FileCheck %s + +// CHECK: call i32 @"\01_asm_func_name.inline" + +// CHECK: declare dso_local i32 @"\01_asm_func_name"(ptr noundef, i32 noundef, ptr noundef, ptr noundef) + +// CHECK: define internal i32 @"\01_asm_func_name.inline" + +// CHECK: call i32 @__mingw_vsnprintf + +// CHECK: declare dso_local i32 @__mingw_vsnprintf + +typedef unsigned int size_t; + +int __mingw_vsnprintf(char *_DstBuf, size_t _MaxCount, const char *_Format, __builtin_va_list _ArgList); + +// For the real use case, "_asm_func_name" is actually "___mingw_vsnprintf", but it's renamed in the testcase for disambiguation. +int vsnprintf(char *__stream, size_t __n, const char *__format, __builtin_va_list __local_argv) __asm__("_asm_func_name"); + +extern __inline__ __attribute__((__always_inline__, __gnu_inline__)) +int vsnprintf(char *__stream, size_t __n, const char *__format, __builtin_va_list __local_argv) +{ + return __mingw_vsnprintf(__stream, __n, __format, __local_argv); +} + +void call(const char* fmt, ...) { + char buf[200]; + __builtin_va_list ap; + __builtin_va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + __builtin_va_end(ap); +} From b9ee2acc9c5c129bce5503dd8d2ab4757e42c990 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 1 Nov 2022 18:16:47 +0000 Subject: [PATCH 076/516] [LinkerWrapper] report on missing libraries The linker wrapper does its own library searching for static archives that can contain device code. The device linking phases happen before the host linking phases so that we can generate the necessary registration code and link it in with the rest of the code. Previously, If a library containing needed device code was not found the execution would continue silently until it failed with undefined symbols. This patch allows the linker wrapper to perform its own check beforehand to catch these errors. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D137180 --- clang/test/Driver/linker-wrapper-image.c | 2 +- clang/test/Driver/linker-wrapper.c | 10 ++++++++-- .../tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 11 +++++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index a54ba4d19122f..abf41d386d2de 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -115,7 +115,7 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 51f3ea3bc5457..b2d73f3621087 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -115,8 +115,8 @@ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -linker-path \ -// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o // HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx90a -o {{.*}}.out {{.*}}.o @@ -134,6 +134,12 @@ // LINKER_ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o a // LINKER_ARGS: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o a b +// RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY + +// MISSING-LIBRARY: error: unable to find library -ldummy + /// Ensure that temp files aren't leftoever from static libraries. // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 40825ac831a50..3ad22be755f3c 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1219,7 +1219,7 @@ Optional searchLibraryBaseName(StringRef Name, StringRef Root, ArrayRef SearchPaths) { for (StringRef Dir : SearchPaths) { if (Optional File = findFile(Dir, Root, "lib" + Name + ".so")) - return None; + return File; if (Optional File = findFile(Dir, Root, "lib" + Name + ".a")) return File; } @@ -1266,7 +1266,7 @@ Expected> getDeviceInput(const ArgList &Args) { return std::move(Err); } - // Try to extract input from input libraries. + // Try to extract input from input archive libraries. for (const opt::Arg *Arg : Args.filtered(OPT_library)) { if (auto Library = searchLibrary(Arg->getValue(), Root, LibraryPaths)) { ErrorOr> BufferOrErr = @@ -1274,8 +1274,15 @@ Expected> getDeviceInput(const ArgList &Args) { if (std::error_code EC = BufferOrErr.getError()) reportError(createFileError(*Library, EC)); + if (identify_magic((*BufferOrErr)->getBuffer()) != file_magic::archive) + continue; + if (Error Err = extractOffloadBinaries(**BufferOrErr, LazyInputFiles)) return std::move(Err); + } else { + reportError(createStringError(inconvertibleErrorCode(), + "unable to find library -l%s", + Arg->getValue())); } } From cccbd2a2b2c38f47e2168998ad859d2b1f4347b4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 2 Nov 2022 15:28:50 -0500 Subject: [PATCH 077/516] Revert "[Attributor][NFCI] Move MemIntrinsic handling into the initializer" This was causing failures when optimizing codes with complex numbers. Revert until a fix can be implemented. This reverts commit 7fdf3564c04075d3e6be2d9540e5a6f1e084be9f. --- .../Transforms/IPO/AttributorAttributes.cpp | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0b446a4f74078..cbc1f8b77c02f 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1516,15 +1516,13 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { AAPointerInfoCallSiteArgument(const IRPosition &IRP, Attributor &A) : AAPointerInfoFloating(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAPointerInfoFloating::initialize(A); - + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + using namespace AA::PointerInfo; // We handle memory intrinsics explicitly, at least the first (= // destination) and second (=source) arguments as we know how they are // accessed. if (auto *MI = dyn_cast_or_null(getCtxI())) { - // TODO: Simplify the length. ConstantInt *Length = dyn_cast(MI->getLength()); int64_t LengthVal = AA::OffsetAndSize::Unknown; if (Length) @@ -1541,22 +1539,16 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { } else { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); - indicatePessimisticFixpoint(); + return indicatePessimisticFixpoint(); } - indicateOptimisticFixpoint(); - LLVM_DEBUG({ - dbgs() << "Accesses by bin after initialization:\n"; + dbgs() << "Accesses by bin after update:\n"; dumpState(dbgs()); }); - return; - } - } - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - using namespace AA::PointerInfo; + return Changed; + } // TODO: Once we have call site specific value information we can provide // call site specific liveness information and then it makes From 117d792f35e6f84f2f29183408284c7e1cc838e7 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Sun, 30 Oct 2022 16:45:00 -0700 Subject: [PATCH 078/516] [clang-format] Don't skip #else/#elif of #if 0 Fixes #58188. Differential Revision: https://reviews.llvm.org/D137052 --- clang/lib/Format/UnwrappedLineParser.cpp | 6 ++---- clang/unittests/Format/FormatTest.cpp | 27 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0372b89397db9..77140831c2c06 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1144,12 +1144,10 @@ void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { - // If the first branch is unreachable, set the BranchIndex to 1. This way - // the next branch will be parsed if there is one. - PPLevelBranchIndex.push_back(Unreachable ? 1 : 0); + PPLevelBranchIndex.push_back(0); PPLevelBranchCount.push_back(0); } - PPChainBranchIndex.push(0); + PPChainBranchIndex.push(Unreachable ? -1 : 0); bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; conditionalCompilationCondition(Unreachable || Skip); } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 18e79125d3894..82e91b3222715 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6127,6 +6127,33 @@ TEST_F(FormatTest, LayoutStatementsAroundPreprocessorDirectives) { "#endif\n" " x;\n" "}"); + + verifyFormat("#if 0\n" + "#endif\n" + "#if X\n" + "int something_fairly_long; // Align here please\n" + "#endif // Should be aligned"); + + verifyFormat("#if 0\n" + "#endif\n" + "#if X\n" + "#else // Align\n" + ";\n" + "#endif // Align"); + + verifyFormat("void SomeFunction(int param1,\n" + " template <\n" + "#ifdef A\n" + "#if 0\n" + "#endif\n" + " MyType>\n" + "#else\n" + " Type1, Type2>\n" + "#endif\n" + " param2,\n" + " param3) {\n" + " f();\n" + "}"); } TEST_F(FormatTest, GraciouslyHandleIncorrectPreprocessorConditions) { From 96696b882bdbeb219fbdd42cad1c091fc86b83d7 Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Tue, 1 Nov 2022 12:29:17 -0500 Subject: [PATCH 079/516] [OpenMP][libomp] Fix disabled affinity Fix setting affinity type and topology method when affinity is disabled and fix places that were not taking into account that affinity can be explicitly disabled by putting proper KMP_AFFINITY_CAPABLE() check. Differential Revision: https://reviews.llvm.org/D137176 --- openmp/runtime/src/kmp.h | 2 ++ openmp/runtime/src/kmp_affinity.cpp | 12 ++++++++++-- openmp/runtime/src/kmp_settings.cpp | 9 +++++++++ openmp/runtime/test/affinity/disabled.c | 25 +++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 openmp/runtime/test/affinity/disabled.c diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 6f7da88f66ac1..8a2bcedb4c9aa 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3655,6 +3655,8 @@ static inline void __kmp_assign_root_init_mask() { } } static inline void __kmp_reset_root_init_mask(int gtid) { + if (!KMP_AFFINITY_CAPABLE()) + return; kmp_info_t *th = __kmp_threads[gtid]; kmp_root_t *r = th->th.th_root; if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) { diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index e9d0b99f6417e..43bf79403f44a 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -675,7 +675,11 @@ void kmp_topology_t::print(const char *env_var) const { kmp_hw_t print_types[KMP_HW_LAST + 2]; // Num Available Threads - KMP_INFORM(AvailableOSProc, env_var, num_hw_threads); + if (num_hw_threads) { + KMP_INFORM(AvailableOSProc, env_var, num_hw_threads); + } else { + KMP_INFORM(AvailableOSProc, env_var, __kmp_xproc); + } // Uniform or not if (is_uniform()) { @@ -3062,7 +3066,8 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, } // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], + if (KMP_AFFINITY_CAPABLE() && + !KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) { INIT_PROC_INFO(threadInfo[num_avail]); continue; @@ -4525,6 +4530,9 @@ void __kmp_affinity_uninitialize(void) { *affinity = KMP_AFFINITY_INIT(affinity->env_var); } if (__kmp_affin_origMask != NULL) { + if (KMP_AFFINITY_CAPABLE()) { + __kmp_set_system_affinity(__kmp_affin_origMask, FALSE); + } KMP_CPU_FREE(__kmp_affin_origMask); __kmp_affin_origMask = NULL; } diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 080f4015b6e06..88eb150fc0c2a 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -6296,6 +6296,15 @@ void __kmp_env_initialize(char const *string) { __kmp_affinity_top_method = affinity_top_method_all; } } + } else { + // If affinity is disabled, then still need to assign topology method + // to attempt machine detection and affinity types + if (__kmp_affinity_top_method == affinity_top_method_default) + __kmp_affinity_top_method = affinity_top_method_all; + if (__kmp_affinity.type == affinity_default) + __kmp_affinity.type = affinity_disabled; + if (__kmp_hh_affinity.type == affinity_default) + __kmp_hh_affinity.type = affinity_disabled; } #ifdef KMP_DEBUG diff --git a/openmp/runtime/test/affinity/disabled.c b/openmp/runtime/test/affinity/disabled.c new file mode 100644 index 0000000000000..18261010a096a --- /dev/null +++ b/openmp/runtime/test/affinity/disabled.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile +// RUN: env KMP_AFFINITY=disabled %libomp-run +// RUN: env KMP_AFFINITY=disabled,reset %libomp-run +// REQUIRES: affinity +#include +#include +#include + +int main() { + int nthreads, correct_value;; + int a = 0; + #pragma omp parallel reduction(+: a) + { + a += omp_get_thread_num(); + #pragma omp single + nthreads = omp_get_num_threads(); + } + correct_value = nthreads * (nthreads - 1) / 2; + if (a != correct_value) { + printf("Incorrect value: %d should be %d\n", a, correct_value); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + From b7d7e96006c5c037bd0c8dd7d2a5f7d76c4aeade Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Nov 2022 15:55:51 -0400 Subject: [PATCH 080/516] [VectorCombine] add tests for load+shuffle and update to typeless ptr; NFC --- .../VectorCombine/X86/load-widening.ll | 236 ++++++++++++------ 1 file changed, 162 insertions(+), 74 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll index abc1a194bb8f2..384c6fa474a00 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll @@ -8,72 +8,72 @@ ; Here we know we can load 128 bits as per dereferenceability and alignment. ; We don't widen scalar loads per-se. -define <1 x float> @scalar(<1 x float>* align 16 dereferenceable(16) %p) { +define <1 x float> @scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @scalar( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 16 + %r = load <1 x float>, ptr %p, align 16 ret <1 x float> %r } ; We don't widen single-element loads, these get scalarized. -define <1 x float> @vec_with_1elt(<1 x float>* align 16 dereferenceable(16) %p) { +define <1 x float> @vec_with_1elt(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_1elt( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 16 + %r = load <1 x float>, ptr %p, align 16 ret <1 x float> %r } -define <2 x float> @vec_with_2elts(<2 x float>* align 16 dereferenceable(16) %p) { +define <2 x float> @vec_with_2elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float>* %p, align 16 + %r = load <2 x float>, ptr %p, align 16 ret <2 x float> %r } -define <3 x float> @vec_with_3elts(<3 x float>* align 16 dereferenceable(16) %p) { +define <3 x float> @vec_with_3elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_3elts( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 16 + %r = load <3 x float>, ptr %p, align 16 ret <3 x float> %r } ; Full-vector load. All good already. -define <4 x float> @vec_with_4elts(<4 x float>* align 16 dereferenceable(16) %p) { +define <4 x float> @vec_with_4elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_4elts( -; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %r = load <4 x float>, <4 x float>* %p, align 16 + %r = load <4 x float>, ptr %p, align 16 ret <4 x float> %r } ; We don't know we can load 256 bits though. -define <5 x float> @vec_with_5elts(<5 x float>* align 16 dereferenceable(16) %p) { +define <5 x float> @vec_with_5elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_5elts( -; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <5 x float> [[R]] ; - %r = load <5 x float>, <5 x float>* %p, align 16 + %r = load <5 x float>, ptr %p, align 16 ret <5 x float> %r } ;------------------------------------------------------------------------------- ; We can load 128 bits, and the fact that it's underaligned isn't relevant. -define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereferenceable(16) %p) { +define <3 x float> @vec_with_3elts_underaligned(ptr align 8 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_3elts_underaligned( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 8 + %r = load <3 x float>, ptr %p, align 8 ret <3 x float> %r } @@ -81,145 +81,145 @@ define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereference ; FIXME: this should still get widened. define <3 x float> @vec_with_3elts_underdereferenceable(<3 x float>* align 16 dereferenceable(12) %p) { ; CHECK-LABEL: @vec_with_3elts_underdereferenceable( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 16 + %r = load <3 x float>, ptr %p, align 16 ret <3 x float> %r } ; We can't tell if we can load 128 bits. -define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(<3 x float>* align 8 dereferenceable(12) %p) { +define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(ptr align 8 dereferenceable(12) %p) { ; CHECK-LABEL: @vec_with_3elts_underaligned_underdereferenceable( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 8 + %r = load <3 x float>, ptr %p, align 8 ret <3 x float> %r } ;------------------------------------------------------------------------------- ; Here we know we can load 256 bits as per dereferenceability and alignment. -define <1 x float> @vec_with_1elt_256bits(<1 x float>* align 32 dereferenceable(32) %p) { +define <1 x float> @vec_with_1elt_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_1elt_256bits( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 32 + %r = load <1 x float>, ptr %p, align 32 ret <1 x float> %r } -define <2 x float> @vec_with_2elts_256bits(<2 x float>* align 32 dereferenceable(32) %p) { +define <2 x float> @vec_with_2elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_2elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float>* %p, align 32 + %r = load <2 x float>, ptr %p, align 32 ret <2 x float> %r } -define <3 x float> @vec_with_3elts_256bits(<3 x float>* align 32 dereferenceable(32) %p) { +define <3 x float> @vec_with_3elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_3elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 32 + %r = load <3 x float>, ptr %p, align 32 ret <3 x float> %r } -define <4 x float> @vec_with_4elts_256bits(<4 x float>* align 32 dereferenceable(32) %p) { +define <4 x float> @vec_with_4elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_4elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %r = load <4 x float>, <4 x float>* %p, align 32 + %r = load <4 x float>, ptr %p, align 32 ret <4 x float> %r } -define <5 x float> @vec_with_5elts_256bits(<5 x float>* align 32 dereferenceable(32) %p) { +define <5 x float> @vec_with_5elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_5elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <5 x float> [[R]] ; - %r = load <5 x float>, <5 x float>* %p, align 32 + %r = load <5 x float>, ptr %p, align 32 ret <5 x float> %r } -define <6 x float> @vec_with_6elts_256bits(<6 x float>* align 32 dereferenceable(32) %p) { +define <6 x float> @vec_with_6elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_6elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <6 x float>, <6 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <6 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <6 x float> [[R]] ; - %r = load <6 x float>, <6 x float>* %p, align 32 + %r = load <6 x float>, ptr %p, align 32 ret <6 x float> %r } -define <7 x float> @vec_with_7elts_256bits(<7 x float>* align 32 dereferenceable(32) %p) { +define <7 x float> @vec_with_7elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_7elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <7 x float>, <7 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <7 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <7 x float> [[R]] ; - %r = load <7 x float>, <7 x float>* %p, align 32 + %r = load <7 x float>, ptr %p, align 32 ret <7 x float> %r } ; Full-vector load. All good already. -define <8 x float> @vec_with_8elts_256bits(<8 x float>* align 32 dereferenceable(32) %p) { +define <8 x float> @vec_with_8elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_8elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <8 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <8 x float> [[R]] ; - %r = load <8 x float>, <8 x float>* %p, align 32 + %r = load <8 x float>, ptr %p, align 32 ret <8 x float> %r } ; We can't tell if we can load more than 256 bits. -define <9 x float> @vec_with_9elts_256bits(<9 x float>* align 32 dereferenceable(32) %p) { +define <9 x float> @vec_with_9elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_9elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <9 x float>, <9 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <9 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <9 x float> [[R]] ; - %r = load <9 x float>, <9 x float>* %p, align 32 + %r = load <9 x float>, ptr %p, align 32 ret <9 x float> %r } ;------------------------------------------------------------------------------- ; Weird types we don't deal with -define <2 x i7> @vec_with_two_subbyte_elts(<2 x i7>* align 16 dereferenceable(16) %p) { +define <2 x i7> @vec_with_two_subbyte_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_subbyte_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, <2 x i7>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i7> [[R]] ; - %r = load <2 x i7>, <2 x i7>* %p, align 16 + %r = load <2 x i7>, ptr %p, align 16 ret <2 x i7> %r } -define <2 x i9> @vec_with_two_nonbyte_sized_elts(<2 x i9>* align 16 dereferenceable(16) %p) { +define <2 x i9> @vec_with_two_nonbyte_sized_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_nonbyte_sized_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, <2 x i9>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i9> [[R]] ; - %r = load <2 x i9>, <2 x i9>* %p, align 16 + %r = load <2 x i9>, ptr %p, align 16 ret <2 x i9> %r } -define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(<2 x i24>* align 16 dereferenceable(16) %p) { +define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_nonpoweroftwo_sized_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, <2 x i24>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i24> [[R]] ; - %r = load <2 x i24>, <2 x i24>* %p, align 16 + %r = load <2 x i24>, ptr %p, align 16 ret <2 x i24> %r } -define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align 16 dereferenceable(16) %p) { +define <2 x float> @vec_with_2elts_addressspace(ptr addrspace(2) align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_addressspace( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float> addrspace(2)* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr addrspace(2) [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float> addrspace(2)* %p, align 16 + %r = load <2 x float>, ptr addrspace(2) %p, align 16 ret <2 x float> %r } @@ -227,27 +227,115 @@ define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align ; Widening these would change the legalized type, so leave them alone. -define <2 x i1> @vec_with_2elts_128bits_i1(<2 x i1>* align 16 dereferenceable(16) %p) { +define <2 x i1> @vec_with_2elts_128bits_i1(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i1( -; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, <2 x i1>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %r = load <2 x i1>, <2 x i1>* %p, align 16 + %r = load <2 x i1>, ptr %p, align 16 ret <2 x i1> %r } -define <2 x i2> @vec_with_2elts_128bits_i2(<2 x i2>* align 16 dereferenceable(16) %p) { +define <2 x i2> @vec_with_2elts_128bits_i2(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i2( -; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, <2 x i2>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i2> [[R]] ; - %r = load <2 x i2>, <2 x i2>* %p, align 16 + %r = load <2 x i2>, ptr %p, align 16 ret <2 x i2> %r } -define <2 x i4> @vec_with_2elts_128bits_i4(<2 x i4>* align 16 dereferenceable(16) %p) { +define <2 x i4> @vec_with_2elts_128bits_i4(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i4( -; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, <2 x i4>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i4> [[R]] ; - %r = load <2 x i4>, <2 x i4>* %p, align 16 + %r = load <2 x i4>, ptr %p, align 16 ret <2 x i4> %r } + +define <4 x float> @load_v1f32_v4f32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v1f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x float> [[L]], <1 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <1 x float>, ptr %p, align 16 + %s = shufflevector <1 x float> %l, <1 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v2f32_v4f32(ptr align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <2 x float>, ptr %p, align 1 + %s = shufflevector <2 x float> %l, <2 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v3f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 1 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32_wrong_mask(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v3f32_v4f32_wrong_mask( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 1 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32_not_deref(ptr dereferenceable(15) %p) { +; CHECK-LABEL: @load_v3f32_v4f32_not_deref( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 16 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) { +; CHECK-LABEL: @load_v2f32_v8f32( +; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[S]] +; + %l = load <2 x float>, ptr %p, align 1 + %s = shufflevector <2 x float> %l, <2 x float> poison, <8 x i32> + ret <8 x float> %s +} + +define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> + ret <4 x i32> %s +} + +define <4 x i32> @load_v2i32_v4i32_non_canonical_mask(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> + ret <4 x i32> %s +} From f4be5ed6a3fef0b2b0c60b29e1c0638926638d28 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Wed, 2 Nov 2022 13:40:52 -0700 Subject: [PATCH 081/516] [clang][pp] only __is_target_environment(unknown) should match unknown target triple environment --- clang/lib/Lex/PPMacroExpansion.cpp | 5 +++++ .../Preprocessor/is_target_unknown_environment.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 clang/test/Preprocessor/is_target_unknown_environment.c diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index e79d76cbf3857..ffc758d47c7a2 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1447,6 +1447,11 @@ static bool isTargetEnvironment(const TargetInfo &TI, const IdentifierInfo *II) { std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); llvm::Triple Env(EnvName); + // The unknown environment is matched only if + // '__is_target_environment(unknown)' is used. + if (Env.getEnvironment() == llvm::Triple::UnknownEnvironment && + EnvName != "---unknown") + return false; return TI.getTriple().getEnvironment() == Env.getEnvironment(); } diff --git a/clang/test/Preprocessor/is_target_unknown_environment.c b/clang/test/Preprocessor/is_target_unknown_environment.c new file mode 100644 index 0000000000000..9462ef442fecd --- /dev/null +++ b/clang/test/Preprocessor/is_target_unknown_environment.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-macos12 -verify %s + +// expected-no-diagnostics + +#if !__is_target_environment(unknown) +#error "mismatching environment" +#endif + +#if __is_target_environment(simulator) || __is_target_environment(SIMULATOR) +#error "mismatching environment" +#endif + +#if __is_target_environment(invalidEnv) +#error "invalid environment must not be matched" +#endif From b6ad7ab89ef5e1772e90248ee728fd955089c949 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Nov 2022 13:09:52 -0700 Subject: [PATCH 082/516] [RISCV] Prevent autovectorization using vscale with Zvl32b. RVVBitsPerBlock is 64. If VLen==32, VLen/RVVBitsPerBlock is 0. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D137280 --- clang/lib/Basic/Targets/RISCV.cpp | 3 +- .../CodeGen/riscv-vector-bits-vscale-range.c | 3 + .../Target/RISCV/RISCVTargetTransformInfo.cpp | 9 ++- .../Transforms/LoopVectorize/RISCV/zvl32b.ll | 69 +++++++++++++++++++ 4 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 36fa962749820..f6db5b8aaf45d 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -252,7 +252,8 @@ RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const { return std::pair( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); - if (unsigned MinVLen = ISAInfo->getMinVLen()) { + if (unsigned MinVLen = ISAInfo->getMinVLen(); + MinVLen >= llvm::RISCV::RVVBitsPerBlock) { unsigned MaxVLen = ISAInfo->getMaxVLen(); // RISCV::RVVBitsPerBlock is 64. return std::make_pair(MinVLen / llvm::RISCV::RVVBitsPerBlock, diff --git a/clang/test/CodeGen/riscv-vector-bits-vscale-range.c b/clang/test/CodeGen/riscv-vector-bits-vscale-range.c index 9fbb9795657b3..ed391f5d04e56 100644 --- a/clang/test/CodeGen/riscv-vector-bits-vscale-range.c +++ b/clang/test/CodeGen/riscv-vector-bits-vscale-range.c @@ -14,6 +14,7 @@ // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64x -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64f -target-feature +f -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d -target-feature +f -target-feature +d -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve32x -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE32 // CHECK-LABEL: @func() #0 // CHECK: attributes #0 = { {{.*}} vscale_range([[#VBITS]],[[#VBITS]]) {{.*}} } @@ -22,4 +23,6 @@ // CHECK-V: attributes #0 = { {{.*}} vscale_range(2,1024) {{.*}} } // CHECK-ZVL: attributes #0 = { {{.*}} vscale_range(8,1024) {{.*}} } // CHECK-ZVE64: attributes #0 = { {{.*}} vscale_range(1,1024) {{.*}} } +// CHECK-ZVE32: attributes #0 +// CHECK-ZVE32-NOT: vscale_range void func(void) {} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index d44f6a647caa3..afb21b868cecf 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -153,7 +153,9 @@ Optional RISCVTTIImpl::getMaxVScale() const { Optional RISCVTTIImpl::getVScaleForTuning() const { if (ST->hasVInstructions()) - return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock; + if (unsigned MinVLen = ST->getRealMinVLen(); + MinVLen >= RISCV::RVVBitsPerBlock) + return MinVLen / RISCV::RVVBitsPerBlock; return BaseT::getVScaleForTuning(); } @@ -169,7 +171,10 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0); case TargetTransformInfo::RGK_ScalableVector: return TypeSize::getScalable( - ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0); + (ST->hasVInstructions() && + ST->getRealMinVLen() >= RISCV::RVVBitsPerBlock) + ? LMUL * RISCV::RVVBitsPerBlock + : 0); } llvm_unreachable("Unsupported register kind"); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll new file mode 100644 index 0000000000000..e4dd8479cdc5d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+zve32f,+f -S 2>%t | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "riscv64" + +; We can't use scalable vectorization for Zvl32b due to RVVBitsPerBlock being +; 64. Since our vscale value is vlen/RVVBitsPerBlock this makes vscale 0. +; Make sure we fall back to fixed vectorization instead. +define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { +; CHECK-LABEL: @vector_add_i16( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT3]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <2 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], <2 x i64> [[STEP_ADD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP0]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP1]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i16> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[WIDE_MASKED_GATHER2]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP2]], <2 x ptr> [[TMP0]], i32 2, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP3]], <2 x ptr> [[TMP1]], i32 2, <2 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[ELEM:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V]] +; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %elem = load i16, ptr %arrayidx + %add = add i16 %elem, %v + store i16 %add, ptr %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} From cf239c2f1777eb94a4801a086acf1332a7d3cccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Cloutier?= Date: Fri, 9 Sep 2022 14:08:19 -0700 Subject: [PATCH 083/516] [NFC] Make format() more amenable to format attributes This change modifies the implementation of the format() function so that vendor forks committed to building with compilers that support __attribute__((format)) on non-variadic functions can check the format() function with it. rdar://84571523 --- llvm/include/llvm/Support/Format.h | 85 +++- llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/Format.cpp | 370 ++++++++++++++++++ llvm/lib/TableGen/SetTheory.cpp | 17 +- llvm/unittests/Support/CMakeLists.txt | 1 + llvm/unittests/Support/FormatChkTest.cpp | 314 +++++++++++++++ llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp | 9 +- 7 files changed, 770 insertions(+), 27 deletions(-) create mode 100644 llvm/lib/Support/Format.cpp create mode 100644 llvm/unittests/Support/FormatChkTest.cpp diff --git a/llvm/include/llvm/Support/Format.h b/llvm/include/llvm/Support/Format.h index 9dd7b401b46a2..386f97488b5ca 100644 --- a/llvm/include/llvm/Support/Format.h +++ b/llvm/include/llvm/Support/Format.h @@ -33,11 +33,63 @@ namespace llvm { +/// Utility class that parses printf-style format strings to yield the expected +/// C type(s) of each specifier. This class is used to verify that a format +/// string unknown at compile-time is equivalent to another format string (which +/// itself is hopefully known at compile-time). +class PrintfStyleFormatReader { +public: + enum SpecifierType : char { + ST_EndOfFormatString, + ST_Unknown, + ST_WideChar, + ST_Int, + ST_Long, + ST_LongLong, + ST_IntMax, + ST_Size, + ST_Ptrdiff, + ST_Double, + ST_LongDouble, + ST_CString, + ST_WideCString, + ST_VoidPointer, + ST_Count_Char, + ST_Count_Short, + ST_Count_Int, + ST_Count_Long, + ST_Count_LongLong, + ST_Count_IntMax, + ST_Count_Size, + ST_Count_Ptrdiff + }; + +private: + const char *Fmt; + llvm::SmallVector SpecifierQueue; + + void refillSpecifierQueue(); + +public: + /// Verify that the format specifiers in \p Fmt consume no more arguments than + /// those in \p Expected, and that all consumed arguments have a compatible + /// type. If \p Fmt is compatible with \p Expected in this way, \p Fmt is + /// returned. Otherwise, \p Expected is returned. + static const char *ensureCompatible(const char *Expected, const char *Fmt); + + PrintfStyleFormatReader(const char *Fmt) : Fmt(Fmt) {} + + SpecifierType nextSpecifier() { + if (SpecifierQueue.empty()) + refillSpecifierQueue(); + return SpecifierQueue.pop_back_val(); + } +}; + /// This is a helper class used for handling formatted output. It is the /// abstract base class of a templated derived class. class format_object_base { protected: - const char *Fmt; ~format_object_base() = default; // Disallow polymorphic deletion. format_object_base(const format_object_base &) = default; virtual void home(); // Out of line virtual method. @@ -46,7 +98,7 @@ class format_object_base { virtual int snprint(char *Buffer, unsigned BufferSize) const = 0; public: - format_object_base(const char *fmt) : Fmt(fmt) {} + format_object_base() = default; /// Format the object into the specified buffer. On success, this returns /// the length of the formatted string. If the buffer is too small, this @@ -86,28 +138,27 @@ struct validate_format_parameters { }; template <> struct validate_format_parameters<> {}; -template -class format_object final : public format_object_base { - std::tuple Vals; - - template - int snprint_tuple(char *Buffer, unsigned BufferSize, - std::index_sequence) const { +template auto format_capture(const char *Fmt, Ts... Vals) { + validate_format_parameters(); + return [=](char *Buffer, unsigned BufferSize) { #ifdef _MSC_VER - return _snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); + return _snprintf(Buffer, BufferSize, Fmt, Vals...); #else - return snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); + return snprintf(Buffer, BufferSize, Fmt, Vals...); #endif - } + }; +} + +template +class format_object final : public format_object_base { + decltype(format_capture("", std::declval()...)) Format; public: - format_object(const char *fmt, const Ts &... vals) - : format_object_base(fmt), Vals(vals...) { - validate_format_parameters(); - } + format_object(const char *Fmt, const Ts &...vals) + : Format(format_capture(Fmt, vals...)) {} int snprint(char *Buffer, unsigned BufferSize) const override { - return snprint_tuple(Buffer, BufferSize, std::index_sequence_for()); + return Format(Buffer, BufferSize); } }; diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index c6a04fdb66599..bc19b5be21409 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -172,6 +172,7 @@ add_llvm_component_library(LLVMSupport FileUtilities.cpp FileOutputBuffer.cpp FoldingSet.cpp + Format.cpp FormattedStream.cpp FormatVariadic.cpp GlobPattern.cpp diff --git a/llvm/lib/Support/Format.cpp b/llvm/lib/Support/Format.cpp new file mode 100644 index 0000000000000..45b279915afac --- /dev/null +++ b/llvm/lib/Support/Format.cpp @@ -0,0 +1,370 @@ +//===- Format.cpp - Efficient printf-style formatting for streams -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the non-template part of Format.h, which is used to +// provide a type-safe-ish interface to printf-style formatting. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Format.h" + +namespace { +/// Enum representation of a printf-style length specifier. +enum ArgLength : char { + /// Corresponds to 'hh' length specifier. + AL_ShortShort, + /// Corresponds to 'h' length specifier. + AL_Short, + /// Corresponds to default length specifier. + AL_Default, + /// Corresponds to 'l' length specifier. + AL_Long, + /// Corresponds to 'll' length specifier. + AL_LongLong, + /// Corresponds to 'j' length specifier. + AL_IntMax, + /// Corresponds to 'z' length specifier. + AL_Size, + /// Corresponds to 't' length specifier. + AL_Ptrdiff, + /// Corresponds to 'L' length specifier. + AL_LongDouble, + /// First invalid value of \p ArgLength. + AL_End, +}; + +/// Enum representation of a printf-style specifier. +enum SpecifierChar : char { + /// Corresponds to any of 'd', 'i', 'u', 'o', 'x' or 'X' specifiers. + SC_Int, + /// Corresponds to any of 'f', 'F', 'e', 'E', 'g', 'G', 'a' or 'A' specifiers. + SC_Float, + /// Corresponds to 'c' specifier. + SC_Char, + /// Corresponds to 's' specifier. + SC_String, + /// Corresponds to 'p' specifier. + SC_VoidPointer, + /// Corresponds to 'n' specifier. + SC_Count, + /// First invalid value of \p SpecifierChar. + SC_End, +}; + +constexpr uint64_t specifierBit(char C) { + // specifierMask builds a bit map where each set bit indicates that the + // character whose ASCII value is 64 + would be legal to use + // as a format specifier in the current parsing context. + // To cover all ASCII characters, we would need 128 bits; however, the only + // character with an ASCII value less than 64 that can be used as a specifier + // is % (as in %%), so we save some space and complexity by dropping the + // lower half of the bit map, which is going to be all zeroes anyway. + // % is handled as a special case. + return (uint64_t)1 << (C - 64); +} + +template +constexpr /* consteval */ uint64_t specifierMask(const char (&Specifiers)[N]) { + uint64_t Mask = 0; + for (const char *I = std::begin(Specifiers); I != std::end(Specifiers); ++I) { + if (*I == 0) + break; + Mask |= specifierBit(*I); + } + return Mask; +} + +constexpr auto ST_Unknown = llvm::PrintfStyleFormatReader::ST_Unknown; +constexpr auto ST_WideChar = llvm::PrintfStyleFormatReader::ST_WideChar; +constexpr auto ST_Int = llvm::PrintfStyleFormatReader::ST_Int; +constexpr auto ST_Long = llvm::PrintfStyleFormatReader::ST_Long; +constexpr auto ST_LongLong = llvm::PrintfStyleFormatReader::ST_LongLong; +constexpr auto ST_IntMax = llvm::PrintfStyleFormatReader::ST_IntMax; +constexpr auto ST_Size = llvm::PrintfStyleFormatReader::ST_Size; +constexpr auto ST_Ptrdiff = llvm::PrintfStyleFormatReader::ST_Ptrdiff; +constexpr auto ST_Double = llvm::PrintfStyleFormatReader::ST_Double; +constexpr auto ST_LongDouble = llvm::PrintfStyleFormatReader::ST_LongDouble; +constexpr auto ST_CString = llvm::PrintfStyleFormatReader::ST_CString; +constexpr auto ST_WideCString = llvm::PrintfStyleFormatReader::ST_WideCString; +constexpr auto ST_VoidPointer = llvm::PrintfStyleFormatReader::ST_VoidPointer; +constexpr auto ST_Count_Char = llvm::PrintfStyleFormatReader::ST_Count_Char; +constexpr auto ST_Count_Short = llvm::PrintfStyleFormatReader::ST_Count_Short; +constexpr auto ST_Count_Int = llvm::PrintfStyleFormatReader::ST_Count_Int; +constexpr auto ST_Count_Long = llvm::PrintfStyleFormatReader::ST_Count_Long; +constexpr auto ST_Count_LongLong = + llvm::PrintfStyleFormatReader::ST_Count_LongLong; +constexpr auto ST_Count_IntMax = llvm::PrintfStyleFormatReader::ST_Count_IntMax; +constexpr auto ST_Count_Size = llvm::PrintfStyleFormatReader::ST_Count_Size; +constexpr auto ST_Count_Ptrdiff = + llvm::PrintfStyleFormatReader::ST_Count_Ptrdiff; + +llvm::PrintfStyleFormatReader::SpecifierType SpecifierTable[SC_End][AL_End] = { + { + // SC_Int + ST_Int, + ST_Int, + ST_Int, + ST_Long, + ST_LongLong, + ST_IntMax, + ST_Size, + ST_Ptrdiff, + ST_Unknown, + }, + { + // SC_Float + ST_Unknown, + ST_Unknown, + ST_Double, + ST_Double, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_LongDouble, + }, + { + // SC_Char + ST_Unknown, + ST_Unknown, + ST_Int, + ST_WideChar, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_String + ST_Unknown, + ST_Unknown, + ST_CString, + ST_WideCString, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_VoidPointer + ST_Unknown, + ST_Unknown, + ST_VoidPointer, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_Count + ST_Count_Char, + ST_Count_Short, + ST_Count_Int, + ST_Count_Long, + ST_Count_LongLong, + ST_Count_IntMax, + ST_Count_Size, + ST_Count_Ptrdiff, + ST_Unknown, + }, +}; +} // namespace + +namespace llvm { + +void PrintfStyleFormatReader::refillSpecifierQueue() { + if (auto PercentPtr = strchr(Fmt, '%')) { + Fmt = PercentPtr; + } else { + SpecifierQueue.push_back(ST_EndOfFormatString); + return; + } + + if (*++Fmt == '%') { + // %% case: skip and try again + ++Fmt; + refillSpecifierQueue(); + return; + } + + // Push ST_Unknown to SpecifierQueue. If we bail out early, this is what + // the caller gets. Fill in real specifiers to Specifiers: if we + // successfully get to the end, then swap Specifiers with SpecifierQueue. + SpecifierQueue.push_back(ST_Unknown); + llvm::SmallVector Specifiers; + + // Bitfield keeping track of which specifier characters are allowed given + // flags and precision settings. Each bit tells whether ascii character + // 0x40 + is allowed as a specifier. '%', which has an ASCII value + // less than 0x40 and does not allow any customization, is handled by a check + // above. The starting value contains all standard specifiers. + uint64_t ValidSpecifiers = specifierMask("diuoxXfFeEgGaAcspn"); + + // update specifier mask based on flags + bool ReadAllFlags = false; + while (!ReadAllFlags) { + switch (*Fmt) { + case '+': + case '-': + case ' ': + // valid for all specifiers + ++Fmt; + break; + case '#': + ValidSpecifiers &= specifierMask("xXaAeEfFgG"); + ++Fmt; + break; + case '0': + ValidSpecifiers &= specifierMask("diouxXaAeEfFgG"); + ++Fmt; + break; + default: + ReadAllFlags = true; + break; + } + } + + // skip width + if (*Fmt == '*') { + Specifiers.push_back(ST_Int); + ++Fmt; + } else + while (*Fmt >= '0' && *Fmt <= '9') + ++Fmt; + + // test precision + if (*Fmt == '.') { + ValidSpecifiers &= specifierMask("diouxXaAeEfFgGs"); + ++Fmt; + if (*Fmt == '*') { + Specifiers.push_back(ST_Int); + ++Fmt; + } else + while (*Fmt >= '0' && *Fmt <= '9') + ++Fmt; + } + + // parse length + bool FoundLength = false; + ArgLength AL = AL_Default; + while (!FoundLength) { + ArgLength NewAL; + switch (*Fmt) { + case 'h': + NewAL = AL_Short; + break; + case 'l': + NewAL = AL_Long; + break; + case 'j': + NewAL = AL_IntMax; + break; + case 'z': + NewAL = AL_Size; + break; + case 't': + NewAL = AL_Ptrdiff; + break; + case 'L': + NewAL = AL_LongDouble; + break; + default: + FoundLength = true; + continue; + } + + if (NewAL == AL_Long && AL == AL_Long) + AL = AL_LongLong; + else if (NewAL == AL_Short && AL == AL_Short) + AL = AL_ShortShort; + else if (AL == AL_Default) + AL = NewAL; + else + return; + ++Fmt; + } + + // parse specifier; verify that the character is a valid specifier given + // restrictions imposed by by the use of flags and precision values + char Next = *Fmt; + if (Next == 0) + return; + + ++Fmt; + if (Next < 0x40 || (specifierBit(Next) & ValidSpecifiers) == 0) + return; + + SpecifierChar SC; + switch (Next) { + case 'd': + case 'i': + case 'u': + case 'o': + case 'x': + case 'X': + SC = SC_Int; + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + SC = SC_Float; + break; + + case 'c': + SC = SC_Char; + break; + + case 's': + SC = SC_String; + break; + + case 'p': + SC = SC_VoidPointer; + break; + + case 'n': + SC = SC_Count; + break; + + default: + return; + } + + auto Spec = SpecifierTable[SC][AL]; + if (Spec == ST_Unknown) + return; + + Specifiers.push_back(Spec); + std::reverse(Specifiers.begin(), Specifiers.end()); + std::swap(Specifiers, SpecifierQueue); +} + +const char *PrintfStyleFormatReader::ensureCompatible(const char *Expected, + const char *Fmt) { + PrintfStyleFormatReader EFR(Expected); + PrintfStyleFormatReader FFR(Fmt); + SpecifierType EST; + do { + EST = EFR.nextSpecifier(); + if (EST != FFR.nextSpecifier()) + return Expected; + } while (EST); + return Fmt; +} + +} // namespace llvm diff --git a/llvm/lib/TableGen/SetTheory.cpp b/llvm/lib/TableGen/SetTheory.cpp index 3db46aae6d967..34fdd35269164 100644 --- a/llvm/lib/TableGen/SetTheory.cpp +++ b/llvm/lib/TableGen/SetTheory.cpp @@ -210,21 +210,26 @@ struct SequenceOp : public SetTheory::Operator { PrintFatalError(Loc, "To out of range"); RecordKeeper &Records = - cast(Expr->getOperator())->getDef()->getRecords(); + cast(Expr->getOperator())->getDef()->getRecords(); Step *= From <= To ? 1 : -1; + const char FallbackFmt[] = "%u"; while (true) { if (Step > 0 && From > To) break; else if (Step < 0 && From < To) break; + const char *const VerifiedFmt = PrintfStyleFormatReader::ensureCompatible( + FallbackFmt, Format.c_str()); + if (VerifiedFmt == FallbackFmt) + PrintFatalError(Loc, "Format string '" + Format + + "' is incompatible with '%u'!"); std::string Name; - raw_string_ostream OS(Name); - OS << format(Format.c_str(), unsigned(From)); - Record *Rec = Records.getDef(OS.str()); + raw_string_ostream(Name) << format(VerifiedFmt, unsigned(From)); + Record *Rec = Records.getDef(Name); if (!Rec) - PrintFatalError(Loc, "No def named '" + Name + "': " + - Expr->getAsString()); + PrintFatalError(Loc, + "No def named '" + Name + "': " + Expr->getAsString()); // Try to reevaluate Rec in case it is a set. if (const RecVec *Result = ST.expand(Rec)) Elts.insert(Result->begin(), Result->end()); diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 7ac6940f7ac47..b7a3d80019013 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_unittest(SupportTests FileCollectorTest.cpp FileOutputBufferTest.cpp FileUtilitiesTest.cpp + FormatChkTest.cpp FormatVariadicTest.cpp FSUniqueIDTest.cpp GlobPatternTest.cpp diff --git a/llvm/unittests/Support/FormatChkTest.cpp b/llvm/unittests/Support/FormatChkTest.cpp new file mode 100644 index 0000000000000..48023b8e48b81 --- /dev/null +++ b/llvm/unittests/Support/FormatChkTest.cpp @@ -0,0 +1,314 @@ +//===- FormatChkTest.cpp - Unit tests for checked string formatting -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Format.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +namespace { + +constexpr auto ST_Unknown = llvm::PrintfStyleFormatReader::ST_Unknown; +constexpr auto ST_WideChar = llvm::PrintfStyleFormatReader::ST_WideChar; +constexpr auto ST_Int = llvm::PrintfStyleFormatReader::ST_Int; +constexpr auto ST_Long = llvm::PrintfStyleFormatReader::ST_Long; +constexpr auto ST_LongLong = llvm::PrintfStyleFormatReader::ST_LongLong; +constexpr auto ST_IntMax = llvm::PrintfStyleFormatReader::ST_IntMax; +constexpr auto ST_Size = llvm::PrintfStyleFormatReader::ST_Size; +constexpr auto ST_Ptrdiff = llvm::PrintfStyleFormatReader::ST_Ptrdiff; +constexpr auto ST_Double = llvm::PrintfStyleFormatReader::ST_Double; +constexpr auto ST_LongDouble = llvm::PrintfStyleFormatReader::ST_LongDouble; +constexpr auto ST_CString = llvm::PrintfStyleFormatReader::ST_CString; +constexpr auto ST_WideCString = llvm::PrintfStyleFormatReader::ST_WideCString; +constexpr auto ST_VoidPointer = llvm::PrintfStyleFormatReader::ST_VoidPointer; +constexpr auto ST_Count_Char = llvm::PrintfStyleFormatReader::ST_Count_Char; +constexpr auto ST_Count_Short = llvm::PrintfStyleFormatReader::ST_Count_Short; +constexpr auto ST_Count_Int = llvm::PrintfStyleFormatReader::ST_Count_Int; +constexpr auto ST_Count_Long = llvm::PrintfStyleFormatReader::ST_Count_Long; +constexpr auto ST_Count_LongLong = + llvm::PrintfStyleFormatReader::ST_Count_LongLong; +constexpr auto ST_Count_IntMax = llvm::PrintfStyleFormatReader::ST_Count_IntMax; +constexpr auto ST_Count_Size = llvm::PrintfStyleFormatReader::ST_Count_Size; +constexpr auto ST_Count_Ptrdiff = + llvm::PrintfStyleFormatReader::ST_Count_Ptrdiff; + +using STVec = std::vector; + +STVec ParseFormatString(const char *Fmt) { + STVec Result; + PrintfStyleFormatReader Reader(Fmt); + while (auto Spec = Reader.nextSpecifier()) { + Result.push_back(Spec); + } + return Result; +} + +#define EXPECT_FMT_EQ(FMT, ...) \ + EXPECT_EQ(ParseFormatString(FMT), STVec({__VA_ARGS__})) + +} // namespace + +TEST(FormatReader, EmptyFormatString) { + EXPECT_EQ(ParseFormatString(""), + std::vector()); +} + +TEST(FormatReader, PercentEscape) { + EXPECT_EQ(ParseFormatString("%%"), + std::vector()); +} + +TEST(FormatReader, PercentAtEnd) { EXPECT_FMT_EQ("%", ST_Unknown); } + +TEST(FormatReader, PercentWithWidth) { EXPECT_FMT_EQ("%ll%", ST_Unknown); } + +TEST(FormatReader, OneFormat) { + EXPECT_FMT_EQ("%i xx", ST_Int); + EXPECT_FMT_EQ("yy %i", ST_Int); + EXPECT_FMT_EQ("yy %i xx", ST_Int); +} + +TEST(FormatReader, TwoFormats) { + EXPECT_FMT_EQ("%i yy %f xx", ST_Int, ST_Double); + EXPECT_FMT_EQ("zz %i yy %f", ST_Int, ST_Double); + EXPECT_FMT_EQ("zz %i yy %f xx", ST_Int, ST_Double); +} + +TEST(FormatReader, PoundFlagValid) { + EXPECT_FMT_EQ("%#x", ST_Int); + EXPECT_FMT_EQ("%#X", ST_Int); + EXPECT_FMT_EQ("%#a", ST_Double); + EXPECT_FMT_EQ("%#A", ST_Double); + EXPECT_FMT_EQ("%#e", ST_Double); + EXPECT_FMT_EQ("%#E", ST_Double); + EXPECT_FMT_EQ("%#f", ST_Double); + EXPECT_FMT_EQ("%#F", ST_Double); + EXPECT_FMT_EQ("%#g", ST_Double); + EXPECT_FMT_EQ("%#G", ST_Double); + + EXPECT_FMT_EQ("%#p", ST_Unknown); + EXPECT_FMT_EQ("%#i", ST_Unknown); + EXPECT_FMT_EQ("%#c", ST_Unknown); + EXPECT_FMT_EQ("%#s", ST_Unknown); + EXPECT_FMT_EQ("%#d", ST_Unknown); + EXPECT_FMT_EQ("%#u", ST_Unknown); + EXPECT_FMT_EQ("%#o", ST_Unknown); + EXPECT_FMT_EQ("%#n", ST_Unknown); +} + +TEST(FormatReader, ZeroFlagValid) { + EXPECT_FMT_EQ("%0x", ST_Int); + EXPECT_FMT_EQ("%0X", ST_Int); + EXPECT_FMT_EQ("%0i", ST_Int); + EXPECT_FMT_EQ("%0d", ST_Int); + EXPECT_FMT_EQ("%0u", ST_Int); + EXPECT_FMT_EQ("%0o", ST_Int); + EXPECT_FMT_EQ("%0a", ST_Double); + EXPECT_FMT_EQ("%0A", ST_Double); + EXPECT_FMT_EQ("%0e", ST_Double); + EXPECT_FMT_EQ("%0E", ST_Double); + EXPECT_FMT_EQ("%0f", ST_Double); + EXPECT_FMT_EQ("%0F", ST_Double); + EXPECT_FMT_EQ("%0g", ST_Double); + EXPECT_FMT_EQ("%0G", ST_Double); + + EXPECT_FMT_EQ("%0p", ST_Unknown); + EXPECT_FMT_EQ("%0n", ST_Unknown); + EXPECT_FMT_EQ("%0c", ST_Unknown); + EXPECT_FMT_EQ("%0s", ST_Unknown); +} + +TEST(FormatReader, PrecisionValid) { + EXPECT_FMT_EQ("%.1x", ST_Int); + EXPECT_FMT_EQ("%.1X", ST_Int); + EXPECT_FMT_EQ("%.1i", ST_Int); + EXPECT_FMT_EQ("%.1d", ST_Int); + EXPECT_FMT_EQ("%.1u", ST_Int); + EXPECT_FMT_EQ("%.1o", ST_Int); + EXPECT_FMT_EQ("%.1a", ST_Double); + EXPECT_FMT_EQ("%.1A", ST_Double); + EXPECT_FMT_EQ("%.1e", ST_Double); + EXPECT_FMT_EQ("%.1E", ST_Double); + EXPECT_FMT_EQ("%.1f", ST_Double); + EXPECT_FMT_EQ("%.1F", ST_Double); + EXPECT_FMT_EQ("%.1g", ST_Double); + EXPECT_FMT_EQ("%.1G", ST_Double); + EXPECT_FMT_EQ("%.1s", ST_CString); + + EXPECT_FMT_EQ("%.1p", ST_Unknown); + EXPECT_FMT_EQ("%.1n", ST_Unknown); + EXPECT_FMT_EQ("%.1c", ST_Unknown); +} + +TEST(FormatReader, LongWidth) { + EXPECT_FMT_EQ("%1li", ST_Long); + EXPECT_FMT_EQ("%11li", ST_Long); + EXPECT_FMT_EQ("%1111li", ST_Long); + EXPECT_FMT_EQ("%10li", ST_Long); + EXPECT_FMT_EQ("%*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%*l!", ST_Unknown); +} + +TEST(FormatReader, LongPrecision) { + EXPECT_FMT_EQ("%.1li", ST_Long); + EXPECT_FMT_EQ("%.11li", ST_Long); + EXPECT_FMT_EQ("%.1111li", ST_Long); + EXPECT_FMT_EQ("%.10li", ST_Long); + EXPECT_FMT_EQ("%.*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%.*l!", ST_Unknown); + + EXPECT_FMT_EQ("%1.1li", ST_Long); + EXPECT_FMT_EQ("%11.11li", ST_Long); + EXPECT_FMT_EQ("%111.1111li", ST_Long); + EXPECT_FMT_EQ("%110.10li", ST_Long); + EXPECT_FMT_EQ("%1.*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%1.*l!", ST_Unknown); + + EXPECT_FMT_EQ("%*.*li", ST_Int, ST_Int, ST_Long); + EXPECT_FMT_EQ("%*.*l!", ST_Unknown); +} + +TEST(FormatReader, IntSpecifiers) { + EXPECT_FMT_EQ("%hhi", ST_Int); + EXPECT_FMT_EQ("%hhd", ST_Int); + EXPECT_FMT_EQ("%hi", ST_Int); + EXPECT_FMT_EQ("%hd", ST_Int); + EXPECT_FMT_EQ("%i", ST_Int); + EXPECT_FMT_EQ("%d", ST_Int); + EXPECT_FMT_EQ("%li", ST_Long); + EXPECT_FMT_EQ("%ld", ST_Long); + EXPECT_FMT_EQ("%lli", ST_LongLong); + EXPECT_FMT_EQ("%lld", ST_LongLong); + EXPECT_FMT_EQ("%ji", ST_IntMax); + EXPECT_FMT_EQ("%jd", ST_IntMax); + EXPECT_FMT_EQ("%zi", ST_Size); + EXPECT_FMT_EQ("%zd", ST_Size); + EXPECT_FMT_EQ("%ti", ST_Ptrdiff); + EXPECT_FMT_EQ("%td", ST_Ptrdiff); + + EXPECT_FMT_EQ("%Li", ST_Unknown); + EXPECT_FMT_EQ("%Ld", ST_Unknown); +} + +TEST(FormatReader, UIntSpecifiers) { + EXPECT_FMT_EQ("%hhu", ST_Int); + EXPECT_FMT_EQ("%hho", ST_Int); + EXPECT_FMT_EQ("%hhx", ST_Int); + EXPECT_FMT_EQ("%hhX", ST_Int); + EXPECT_FMT_EQ("%hu", ST_Int); + EXPECT_FMT_EQ("%ho", ST_Int); + EXPECT_FMT_EQ("%hx", ST_Int); + EXPECT_FMT_EQ("%hX", ST_Int); + EXPECT_FMT_EQ("%u", ST_Int); + EXPECT_FMT_EQ("%o", ST_Int); + EXPECT_FMT_EQ("%x", ST_Int); + EXPECT_FMT_EQ("%X", ST_Int); + EXPECT_FMT_EQ("%lu", ST_Long); + EXPECT_FMT_EQ("%lo", ST_Long); + EXPECT_FMT_EQ("%lx", ST_Long); + EXPECT_FMT_EQ("%lX", ST_Long); + EXPECT_FMT_EQ("%llu", ST_LongLong); + EXPECT_FMT_EQ("%llo", ST_LongLong); + EXPECT_FMT_EQ("%llx", ST_LongLong); + EXPECT_FMT_EQ("%llX", ST_LongLong); + EXPECT_FMT_EQ("%ju", ST_IntMax); + EXPECT_FMT_EQ("%jo", ST_IntMax); + EXPECT_FMT_EQ("%jx", ST_IntMax); + EXPECT_FMT_EQ("%jX", ST_IntMax); + EXPECT_FMT_EQ("%zu", ST_Size); + EXPECT_FMT_EQ("%zo", ST_Size); + EXPECT_FMT_EQ("%zx", ST_Size); + EXPECT_FMT_EQ("%zX", ST_Size); + EXPECT_FMT_EQ("%tu", ST_Ptrdiff); + EXPECT_FMT_EQ("%to", ST_Ptrdiff); + EXPECT_FMT_EQ("%tx", ST_Ptrdiff); + EXPECT_FMT_EQ("%tX", ST_Ptrdiff); + + EXPECT_FMT_EQ("%Lu", ST_Unknown); + EXPECT_FMT_EQ("%Lo", ST_Unknown); + EXPECT_FMT_EQ("%Lx", ST_Unknown); + EXPECT_FMT_EQ("%LX", ST_Unknown); +} + +TEST(FormatReader, FloatSpecifiers) { + EXPECT_FMT_EQ("%a", ST_Double); + EXPECT_FMT_EQ("%e", ST_Double); + EXPECT_FMT_EQ("%f", ST_Double); + EXPECT_FMT_EQ("%g", ST_Double); + EXPECT_FMT_EQ("%la", ST_Double); + EXPECT_FMT_EQ("%le", ST_Double); + EXPECT_FMT_EQ("%lf", ST_Double); + EXPECT_FMT_EQ("%lg", ST_Double); + + EXPECT_FMT_EQ("%La", ST_LongDouble); + EXPECT_FMT_EQ("%Le", ST_LongDouble); + EXPECT_FMT_EQ("%Lf", ST_LongDouble); + EXPECT_FMT_EQ("%Lg", ST_LongDouble); + + EXPECT_FMT_EQ("%ha", ST_Unknown); + EXPECT_FMT_EQ("%he", ST_Unknown); + EXPECT_FMT_EQ("%hf", ST_Unknown); + EXPECT_FMT_EQ("%hg", ST_Unknown); + EXPECT_FMT_EQ("%hha", ST_Unknown); + EXPECT_FMT_EQ("%hhe", ST_Unknown); + EXPECT_FMT_EQ("%hhf", ST_Unknown); + EXPECT_FMT_EQ("%hhg", ST_Unknown); + EXPECT_FMT_EQ("%lla", ST_Unknown); + EXPECT_FMT_EQ("%lle", ST_Unknown); + EXPECT_FMT_EQ("%llf", ST_Unknown); + EXPECT_FMT_EQ("%llg", ST_Unknown); +} + +TEST(FormatReader, CharSpecifiers) { + EXPECT_FMT_EQ("%hhc", ST_Unknown); + EXPECT_FMT_EQ("%hc", ST_Unknown); + EXPECT_FMT_EQ("%c", ST_Int); + EXPECT_FMT_EQ("%lc", ST_WideChar); + EXPECT_FMT_EQ("%llc", ST_Unknown); + EXPECT_FMT_EQ("%jc", ST_Unknown); + EXPECT_FMT_EQ("%zc", ST_Unknown); + EXPECT_FMT_EQ("%tc", ST_Unknown); + EXPECT_FMT_EQ("%Lc", ST_Unknown); +} + +TEST(FormatReader, StringSpecifiers) { + EXPECT_FMT_EQ("%hhs", ST_Unknown); + EXPECT_FMT_EQ("%hs", ST_Unknown); + EXPECT_FMT_EQ("%s", ST_CString); + EXPECT_FMT_EQ("%ls", ST_WideCString); + EXPECT_FMT_EQ("%lls", ST_Unknown); + EXPECT_FMT_EQ("%js", ST_Unknown); + EXPECT_FMT_EQ("%zs", ST_Unknown); + EXPECT_FMT_EQ("%ts", ST_Unknown); + EXPECT_FMT_EQ("%Ls", ST_Unknown); +} + +TEST(FormatReader, VoidPointerSpecifiers) { + EXPECT_FMT_EQ("%hhp", ST_Unknown); + EXPECT_FMT_EQ("%hp", ST_Unknown); + EXPECT_FMT_EQ("%p", ST_VoidPointer); + EXPECT_FMT_EQ("%lp", ST_Unknown); + EXPECT_FMT_EQ("%llp", ST_Unknown); + EXPECT_FMT_EQ("%jp", ST_Unknown); + EXPECT_FMT_EQ("%zp", ST_Unknown); + EXPECT_FMT_EQ("%tp", ST_Unknown); + EXPECT_FMT_EQ("%Lp", ST_Unknown); +} + +TEST(FormatReader, CountSpecifiers) { + EXPECT_FMT_EQ("%hhn", ST_Count_Char); + EXPECT_FMT_EQ("%hn", ST_Count_Short); + EXPECT_FMT_EQ("%n", ST_Count_Int); + EXPECT_FMT_EQ("%ln", ST_Count_Long); + EXPECT_FMT_EQ("%lln", ST_Count_LongLong); + EXPECT_FMT_EQ("%jn", ST_Count_IntMax); + EXPECT_FMT_EQ("%zn", ST_Count_Size); + EXPECT_FMT_EQ("%tn", ST_Count_Ptrdiff); + EXPECT_FMT_EQ("%Ln", ST_Unknown); +} diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp index 8be32d2effa6e..56082f374ae80 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp @@ -61,10 +61,11 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const { const char *ToFmt = "Node%p:d%d:s"; if (E->getFromMO()->isDef() && !E->getToMO()->isDef()) std::swap(FromFmt, ToFmt); - auto From = format(FromFmt, E->getFromMI(), E->getFromMO()->getIdx()); - auto To = format(ToFmt, E->getToMI(), E->getToMO()->getIdx()); - if (E->getFromMO()->isDef() && !E->getToMO()->isDef()) - std::swap(From, To); + auto FromF = format(FromFmt, E->getFromMI(), E->getFromMO()->getIdx()); + auto ToF = format(ToFmt, E->getToMI(), E->getToMO()->getIdx()); + bool Swap = E->getFromMO()->isDef() && !E->getToMO()->isDef(); + auto &From = Swap ? ToF : FromF; + auto &To = Swap ? FromF : ToF; OS << " " << From << " -> " << To << " [label=\"$" << E->getName(); if (E->getFromMO()->isDef() == E->getToMO()->isDef()) From 51c1632f8d500a8439707153439650945c446dfa Mon Sep 17 00:00:00 2001 From: yijiagu Date: Wed, 2 Nov 2022 13:51:48 -0700 Subject: [PATCH 084/516] [mlir] Remove eliminateBlockingAwaitOps option in AsyncToAsyncRuntime pass Remove the eliminateBlockingAwaitOps option in AsyncToAsyncRuntime pass Today the AsyncToAsyncRuntime pass does two things: one is converting normal funcs with async ops to coroutine cfg; the other is lowering high level async operations to async.coro and async.runtime operations. This patch removes the converting step from AsyncToAsyncRuntime pass. In the next step we will create a new asyncfication pass for converting normal funcs to the newly added async.func operation. Reviewed By: ezhulenev Differential Revision: https://reviews.llvm.org/D137282 --- .../mlir/Dialect/Async/IR/AsyncDialect.td | 5 +- mlir/include/mlir/Dialect/Async/Passes.td | 7 - .../Async/Transforms/AsyncToAsyncRuntime.cpp | 143 -------- ...c-to-async-runtime-eliminate-blocking.mlir | 324 ------------------ 4 files changed, 2 insertions(+), 477 deletions(-) delete mode 100644 mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td b/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td index e3b4db42204b8..eb1d76a180fe2 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td @@ -32,9 +32,8 @@ def AsyncDialect : Dialect { let extraClassDeclaration = [{ /// The name of a unit attribute on funcs that are allowed to have a - /// blocking async.runtime.await ops. Only useful in combination with - /// 'eliminate-blocking-await-ops' option, which in absence of this - /// attribute might convert a func to a coroutine. + /// blocking async.runtime.await ops. In absence of this attribute the + /// asyncification pass might convert a func to a coroutine. static constexpr StringRef kAllowedToBlockAttrName = "async.allowed_to_block"; }]; diff --git a/mlir/include/mlir/Dialect/Async/Passes.td b/mlir/include/mlir/Dialect/Async/Passes.td index 16fb8626c0c0e..aed5b4ff7865a 100644 --- a/mlir/include/mlir/Dialect/Async/Passes.td +++ b/mlir/include/mlir/Dialect/Async/Passes.td @@ -44,13 +44,6 @@ def AsyncToAsyncRuntime : Pass<"async-to-async-runtime", "ModuleOp"> { let summary = "Lower high level async operations (e.g. async.execute) to the" "explicit async.runtime and async.coro operations"; let constructor = "mlir::createAsyncToAsyncRuntimePass()"; - let options = [ - // Temporary for bringup, should become the default. - Option<"eliminateBlockingAwaitOps", "eliminate-blocking-await-ops", "bool", - /*default=*/"false", - "Rewrite functions with blocking async.runtime.await as coroutines " - "with async.runtime.await_and_resume.">, - ]; let dependentDialects = ["async::AsyncDialect", "func::FuncDialect"]; } diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp index b4880c0e3b3f5..38f3717c70f9b 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp @@ -610,137 +610,6 @@ class AssertOpLowering : public OpConversionPattern { llvm::DenseMap &outlinedFunctions; }; -//===----------------------------------------------------------------------===// - -/// Rewrite a func as a coroutine by: -/// 1) Wrapping the results into `async.value`. -/// 2) Prepending the results with `async.token`. -/// 3) Setting up coroutine blocks. -/// 4) Rewriting return ops as yield op and branch op into the suspend block. -static CoroMachinery rewriteFuncAsCoroutine(func::FuncOp func) { - auto *ctx = func->getContext(); - auto loc = func.getLoc(); - SmallVector resultTypes; - resultTypes.reserve(func.getCallableResults().size()); - llvm::transform(func.getCallableResults(), std::back_inserter(resultTypes), - [](Type type) { return ValueType::get(type); }); - func.setType( - FunctionType::get(ctx, func.getFunctionType().getInputs(), resultTypes)); - func.insertResult(0, TokenType::get(ctx), {}); - for (Block &block : func.getBlocks()) { - Operation *terminator = block.getTerminator(); - if (auto returnOp = dyn_cast(*terminator)) { - ImplicitLocOpBuilder builder(loc, returnOp); - builder.create(returnOp.getOperands()); - returnOp.erase(); - } - } - return setupCoroMachinery(func); -} - -/// Rewrites a call into a function that has been rewritten as a coroutine. -/// -/// The invocation of this function is safe only when call ops are traversed in -/// reverse order of how they appear in a single block. See `funcsToCoroutines`. -static void rewriteCallsiteForCoroutine(func::CallOp oldCall, - func::FuncOp func) { - auto loc = func.getLoc(); - ImplicitLocOpBuilder callBuilder(loc, oldCall); - auto newCall = callBuilder.create( - func.getName(), func.getCallableResults(), oldCall.getArgOperands()); - - // Await on the async token and all the value results and unwrap the latter. - callBuilder.create(loc, newCall.getResults().front()); - SmallVector unwrappedResults; - unwrappedResults.reserve(newCall->getResults().size() - 1); - for (Value result : newCall.getResults().drop_front()) - unwrappedResults.push_back( - callBuilder.create(loc, result).getResult()); - // Careful, when result of a call is piped into another call this could lead - // to a dangling pointer. - oldCall.replaceAllUsesWith(unwrappedResults); - oldCall.erase(); -} - -static bool isAllowedToBlock(func::FuncOp func) { - return !!func->getAttrOfType(AsyncDialect::kAllowedToBlockAttrName); -} - -static LogicalResult funcsToCoroutines( - ModuleOp module, - llvm::DenseMap &outlinedFunctions) { - // The following code supports the general case when 2 functions mutually - // recurse into each other. Because of this and that we are relying on - // SymbolUserMap to find pointers to calling FuncOps, we cannot simply erase - // a FuncOp while inserting an equivalent coroutine, because that could lead - // to dangling pointers. - - SmallVector funcWorklist; - - // Careful, it's okay to add a func to the worklist multiple times if and only - // if the loop processing the worklist will skip the functions that have - // already been converted to coroutines. - auto addToWorklist = [&](func::FuncOp func) { - if (isAllowedToBlock(func)) - return; - // N.B. To refactor this code into a separate pass the lookup in - // outlinedFunctions is the most obvious obstacle. Looking at an arbitrary - // func and recognizing if it has a coroutine structure is messy. Passing - // this dict between the passes is ugly. - if (isAllowedToBlock(func) || - outlinedFunctions.find(func) == outlinedFunctions.end()) { - for (Operation &op : func.getBody().getOps()) { - if (isa(op)) { - funcWorklist.push_back(func); - break; - } - } - } - }; - - // Traverse in post-order collecting for each func op the await ops it has. - for (func::FuncOp func : module.getOps()) - addToWorklist(func); - - SymbolTableCollection symbolTable; - SymbolUserMap symbolUserMap(symbolTable, module); - - // Rewrite funcs, while updating call sites and adding them to the worklist. - while (!funcWorklist.empty()) { - auto func = funcWorklist.pop_back_val(); - auto insertion = outlinedFunctions.insert({func, CoroMachinery{}}); - if (!insertion.second) - // This function has already been processed because this is either - // the corecursive case, or a caller with multiple calls to a newly - // created corouting. Either way, skip updating the call sites. - continue; - insertion.first->second = rewriteFuncAsCoroutine(func); - SmallVector users(symbolUserMap.getUsers(func).begin(), - symbolUserMap.getUsers(func).end()); - // If there are multiple calls from the same block they need to be traversed - // in reverse order so that symbolUserMap references are not invalidated - // when updating the users of the call op which is earlier in the block. - llvm::sort(users, [](Operation *a, Operation *b) { - Block *blockA = a->getBlock(); - Block *blockB = b->getBlock(); - // Impose arbitrary order on blocks so that there is a well-defined order. - return blockA > blockB || (blockA == blockB && !a->isBeforeInBlock(b)); - }); - // Rewrite the callsites to await on results of the newly created coroutine. - for (Operation *op : users) { - if (func::CallOp call = dyn_cast(*op)) { - func::FuncOp caller = call->getParentOfType(); - rewriteCallsiteForCoroutine(call, func); // Careful, erases the call op. - addToWorklist(caller); - } else { - op->emitError("Unexpected reference to func referenced by symbol"); - return failure(); - } - } - } - return success(); -} - //===----------------------------------------------------------------------===// void AsyncToAsyncRuntimePass::runOnOperation() { ModuleOp module = getOperation(); @@ -764,12 +633,6 @@ void AsyncToAsyncRuntimePass::runOnOperation() { return outlinedFunctions.find(parentFunc) != outlinedFunctions.end(); }; - if (eliminateBlockingAwaitOps && - failed(funcsToCoroutines(module, outlinedFunctions))) { - signalPassFailure(); - return; - } - // Lower async operations to async.runtime operations. MLIRContext *ctx = module->getContext(); RewritePatternSet asyncPatterns(ctx); @@ -815,12 +678,6 @@ void AsyncToAsyncRuntimePass::runOnOperation() { return outlinedFunctions.find(func) == outlinedFunctions.end(); }); - if (eliminateBlockingAwaitOps) - runtimeTarget.addDynamicallyLegalOp( - [&](RuntimeAwaitOp op) -> bool { - return isAllowedToBlock(op->getParentOfType()); - }); - if (failed(applyPartialConversion(module, runtimeTarget, std::move(asyncPatterns)))) { signalPassFailure(); diff --git a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir b/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir deleted file mode 100644 index 5a85c3d8974e6..0000000000000 --- a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir +++ /dev/null @@ -1,324 +0,0 @@ -// RUN: mlir-opt %s -split-input-file \ -// RUN: -async-to-async-runtime="eliminate-blocking-await-ops=true" \ -// RUN: | FileCheck %s --dump-input=always - -// CHECK-LABEL: func @simple_callee -// CHECK-SAME: (%[[ARG:.*]]: f32) -// CHECK-SAME: -> (!async.token, !async.value {builtin.foo = "bar"}) -func.func @simple_callee(%arg0: f32) -> (f32 {builtin.foo = "bar"}) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: -// CHECK: %[[VAL:.*]] = arith.addf %[[ARG]], %[[ARG]] : f32 - %0 = arith.addf %arg0, %arg0 : f32 -// CHECK: %[[VAL_STORAGE:.*]] = async.runtime.create : !async.value - %1 = async.runtime.create: !async.value -// CHECK: async.runtime.store %[[VAL]], %[[VAL_STORAGE]] : - async.runtime.store %0, %1: !async.value -// CHECK: async.runtime.set_available %[[VAL_STORAGE]] : !async.value - async.runtime.set_available %1: !async.value - -// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[VAL_STORAGE]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] - %2 = async.await %1 : !async.value - -// CHECK: ^[[RESUME]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[VAL_STORAGE]] : !async.value -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[LOADED:.*]] = async.runtime.load %[[VAL_STORAGE]] : -// CHECK: %[[RETURNED:.*]] = arith.mulf %[[ARG]], %[[LOADED]] : f32 -// CHECK: async.runtime.store %[[RETURNED]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - %3 = arith.mulf %arg0, %2 : f32 - return %3: f32 - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @simple_caller() -// CHECK-SAME: -> (!async.token, !async.value) -func.func @simple_caller() -> f32 { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: - -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: ^[[RESUME]]: -// CHECK: %[[IS_TOKEN_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK]]: -// CHECK: %[[IS_VALUE_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK]]: -// CHECK: %[[LOADED:.*]] = async.runtime.load %[[RETURNED_TO_CALLER]]#1 : -// CHECK: async.runtime.store %[[LOADED]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - return %r: f32 -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @double_caller() -// CHECK-SAME: -> (!async.token, !async.value) -func.func @double_caller() -> f32 { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: - -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER_1:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER_1]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_TOKEN_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_1:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK_1]]: -// CHECK: %[[IS_VALUE_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_1:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK_1]]: -// CHECK: %[[LOADED_1:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_1]]#1 : -// CHECK: %[[RETURNED_TO_CALLER_2:.*]]:2 = call @simple_callee(%[[LOADED_1]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER_2]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - %s = call @simple_callee(%r): (f32) -> f32 - -// CHECK: ^[[RESUME_2]]: -// CHECK: %[[IS_TOKEN_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_2:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK_2]]: -// CHECK: %[[IS_VALUE_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_2:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK_2]]: -// CHECK: %[[LOADED_2:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_2]]#1 : -// CHECK: async.runtime.store %[[LOADED_2]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - return %s: f32 -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @recursive -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @recursive(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @recursive(%[[GIVEN]]) : (!async.token) -> !async.token -call @recursive(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @corecursive1 -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @corecursive1(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @corecursive2(%[[GIVEN]]) : (!async.token) -> !async.token -call @corecursive2(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @corecursive2 -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @corecursive2(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @corecursive1(%[[GIVEN]]) : (!async.token) -> !async.token -call @corecursive1(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @caller_allowed_to_block -// CHECK-SAME: () -> f32 -func.func @caller_allowed_to_block() -> f32 attributes { async.allowed_to_block } { -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: async.runtime.await %[[RETURNED_TO_CALLER]]#0 -// CHECK: async.runtime.await %[[RETURNED_TO_CALLER]]#1 -// CHECK: %[[RETURNED:.*]] = async.runtime.load %[[RETURNED_TO_CALLER]]#1 - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: return %[[RETURNED]] : f32 - return %r: f32 -} From f6a6f35d1b3fd8df48b4d6687a9933943b4c9df8 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 2 Nov 2022 21:01:56 +0000 Subject: [PATCH 085/516] [gn build] Port cf239c2f1777 --- llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index b5a393afda93a..397b309d524bb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -84,6 +84,7 @@ static_library("Support") { "FileOutputBuffer.cpp", "FileUtilities.cpp", "FoldingSet.cpp", + "Format.cpp", "FormatVariadic.cpp", "FormattedStream.cpp", "GlobPattern.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 4766c00f8c85d..619020ca8c43d 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -44,6 +44,7 @@ unittest("SupportTests") { "FileCollectorTest.cpp", "FileOutputBufferTest.cpp", "FileUtilitiesTest.cpp", + "FormatChkTest.cpp", "FormatVariadicTest.cpp", "GlobPatternTest.cpp", "HashBuilderTest.cpp", From c7576cb89d6c95f03968076e902d3adfd1996577 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 2 Nov 2022 19:43:53 +0100 Subject: [PATCH 086/516] [libc++][test] Fixes transitive includes. These were accidentally set to generating in 243da90ea5357c1ca324f714ea4813dc9029af27 Reviewed By: #libc, philnik Differential Revision: https://reviews.llvm.org/D137278 --- libcxx/test/libcxx/transitive_includes.sh.cpp | 5 +++-- libcxx/test/libcxx/transitive_includes/cxx03.csv | 1 + libcxx/test/libcxx/transitive_includes/cxx11.csv | 1 + libcxx/test/libcxx/transitive_includes/cxx14.csv | 1 + libcxx/test/libcxx/transitive_includes/cxx17.csv | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libcxx/test/libcxx/transitive_includes.sh.cpp b/libcxx/test/libcxx/transitive_includes.sh.cpp index edb696fb8da54..04c5debbea855 100644 --- a/libcxx/test/libcxx/transitive_includes.sh.cpp +++ b/libcxx/test/libcxx/transitive_includes.sh.cpp @@ -52,7 +52,7 @@ import re # the file and run this test. # Note that this needs to be done for all supported language versions of libc++: # for std in c++03 c++11 c++14 c++17 c++20 c++2b; do /bin/llvm-lit --param std=$std ${path_to_this_file}; done -regenerate_expected_results = True +regenerate_expected_results = False # Used because the sequence of tokens RUN : can't appear anywhere or it'll confuse Lit. RUN = "RUN" @@ -560,5 +560,6 @@ END-SCRIPT #if defined(TEST_140) #include #endif -// RUN: %{python} %S/transitive_includes_to_csv.py %t > %S/transitive_includes/%{cxx_std}.csv +// RUN: %{python} %S/transitive_includes_to_csv.py %t > %t/transitive_includes.csv +// RUN: diff -w %S/transitive_includes/%{cxx_std}.csv %t/transitive_includes.csv // GENERATED-MARKER diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 1bdb9cb7e7f71..a7250cf841dba 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono cstdint chrono ctime chrono limits chrono ratio diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index b8b01397da9a0..97dff0bf62d4d 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono cstdint chrono ctime chrono limits chrono ratio diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 0a9ece5a121a9..e0935e85f1b7f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono cstdint chrono ctime chrono limits chrono ratio diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index 0a9ece5a121a9..e0935e85f1b7f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono cstdint chrono ctime chrono limits chrono ratio From a588cfe37ea36ec4fae35a233a13d3557fba86e9 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Wed, 2 Nov 2022 20:58:36 +0000 Subject: [PATCH 087/516] [Sanitizers] Modified __aarch64__ to use the 64 bit version of the allocator. This change will switch SizeClassAllocator32 to SizeClassAllocator64 on ARM. This might potentially affect ARM platforms with 39-bit address space. This addresses [[ https://github.com/google/sanitizers/issues/703 | issues/703 ]], but unlike [[ https://reviews.llvm.org/D60243 | D60243 ]] it defaults to 64 bit allocator. Reviewed By: vitalybuka, MaskRay Differential Revision: https://reviews.llvm.org/D137136 --- compiler-rt/lib/sanitizer_common/sanitizer_platform.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 32005eef08cd5..7ecc465bea97a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -286,8 +286,8 @@ #ifndef SANITIZER_CAN_USE_ALLOCATOR64 # if (SANITIZER_ANDROID && defined(__aarch64__)) || SANITIZER_FUCHSIA # define SANITIZER_CAN_USE_ALLOCATOR64 1 -# elif defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__) +# elif defined(__mips64) || defined(__arm__) || defined(__i386__) || \ + SANITIZER_RISCV64 || defined(__hexagon__) # define SANITIZER_CAN_USE_ALLOCATOR64 0 # else # define SANITIZER_CAN_USE_ALLOCATOR64 (SANITIZER_WORDSIZE == 64) From ea64e66f7b71dfd52e9701a080b8216052344962 Mon Sep 17 00:00:00 2001 From: Jennifer Yu Date: Tue, 1 Nov 2022 14:46:12 -0700 Subject: [PATCH 088/516] [OPENMP]Initial support for error directive. Differential Revision: https://reviews.llvm.org/D137209 --- clang/include/clang-c/Index.h | 6 +- clang/include/clang/AST/RecursiveASTVisitor.h | 4 + clang/include/clang/AST/StmtOpenMP.h | 45 +++++++ clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/Sema.h | 4 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 15 +++ clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 3 + clang/lib/Basic/OpenMPKinds.cpp | 1 + clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 5 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Parse/ParseOpenMP.cpp | 6 +- clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 20 +++ clang/lib/Sema/TreeTransform.h | 11 ++ clang/lib/Serialization/ASTReaderStmt.cpp | 12 ++ clang/lib/Serialization/ASTWriterStmt.cpp | 7 ++ clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/test/OpenMP/error_ast_print.cpp | 62 ++++++++++ clang/test/OpenMP/error_message.cpp | 114 ++++++++++++++++++ clang/tools/libclang/CIndex.cpp | 7 ++ clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 25 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 clang/test/OpenMP/error_ast_print.cpp create mode 100644 clang/test/OpenMP/error_message.cpp diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 2cd2d499ab53d..e0f6f1c73549f 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -1978,7 +1978,11 @@ enum CXCursorKind { */ CXCursor_OMPParallelMaskedTaskLoopSimdDirective = 304, - CXCursor_LastStmt = CXCursor_OMPParallelMaskedTaskLoopSimdDirective, + /** OpenMP error directive. + */ + CXCursor_OMPErrorDirective = 305, + + CXCursor_LastStmt = CXCursor_OMPErrorDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5eb8e0353ffd8..3d10d3bf98b29 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3153,6 +3153,10 @@ DEF_TRAVERSE_STMT(OMPParallelGenericLoopDirective, DEF_TRAVERSE_STMT(OMPTargetParallelGenericLoopDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) + +DEF_TRAVERSE_STMT(OMPErrorDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + // OpenMP clauses. template bool RecursiveASTVisitor::TraverseOMPClause(OMPClause *C) { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 702a82537ab2d..baa5e0ed7b63f 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -6220,6 +6220,51 @@ class OMPTargetParallelGenericLoopDirective final : public OMPLoopDirective { return T->getStmtClass() == OMPTargetParallelGenericLoopDirectiveClass; } }; + +/// This represents '#pragma omp error' directive. +/// +/// \code +/// #pragma omp error +/// \endcode +class OMPErrorDirective final : public OMPExecutableDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// + OMPErrorDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPExecutableDirective(OMPErrorDirectiveClass, llvm::omp::OMPD_error, + StartLoc, EndLoc) {} + /// Build an empty directive. + /// + explicit OMPErrorDirective() + : OMPExecutableDirective(OMPErrorDirectiveClass, llvm::omp::OMPD_error, + SourceLocation(), SourceLocation()) {} + +public: + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param Clauses List of clauses. + /// + static OMPErrorDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses); + + /// Creates an empty directive. + /// + /// \param C AST context. + /// + static OMPErrorDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPErrorDirectiveClass; + } +}; } // end namespace clang #endif diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index ebbd8db313428..c434b07c95a40 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -292,3 +292,4 @@ def OMPTeamsGenericLoopDirective : StmtNode; def OMPTargetTeamsGenericLoopDirective : StmtNode; def OMPParallelGenericLoopDirective : StmtNode; def OMPTargetParallelGenericLoopDirective : StmtNode; +def OMPErrorDirective : StmtNode; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 46c139d6c62b0..e8c9cb966bae7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11286,6 +11286,10 @@ class Sema final { /// Called on well-formed '\#pragma omp taskyield'. StmtResult ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp error'. + StmtResult ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp barrier'. StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc, SourceLocation EndLoc); diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 21c6cdcf592fa..ceaade4a6e1e8 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1932,6 +1932,7 @@ enum StmtCode { STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE, STMT_OMP_TASK_DIRECTIVE, STMT_OMP_TASKYIELD_DIRECTIVE, + STMT_OMP_ERROR_DIRECTIVE, STMT_OMP_BARRIER_DIRECTIVE, STMT_OMP_TASKWAIT_DIRECTIVE, STMT_OMP_FLUSH_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index e0a4221db7ecf..88bb517e2280b 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -744,6 +744,21 @@ OMPTaskyieldDirective *OMPTaskyieldDirective::CreateEmpty(const ASTContext &C, return new (C) OMPTaskyieldDirective(); } +OMPErrorDirective *OMPErrorDirective::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses) { + return createDirective( + C, Clauses, /*AssociatedStmt=*/nullptr, /*NumChildren=*/0, StartLoc, + EndLoc); +} + +OMPErrorDirective *OMPErrorDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + EmptyShell) { + return createEmptyDirective(C, NumClauses); +} + OMPBarrierDirective *OMPBarrierDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc) { diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index a29f762e10c14..70bed0eb60df0 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -843,6 +843,11 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPErrorDirective(OMPErrorDirective *Node) { + Indent() << "#pragma omp error"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) { Indent() << "#pragma omp taskgroup"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 6af2beefc7926..01d44a95bbd4b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1014,6 +1014,9 @@ void StmtProfiler::VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *S) { VisitOMPExecutableDirective(S); } +void StmtProfiler::VisitOMPErrorDirective(const OMPErrorDirective *S) { + VisitOMPExecutableDirective(S); +} void StmtProfiler::VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *S) { VisitOMPExecutableDirective(S); if (const Expr *E = S->getReductionRef()) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2f2e6537ebd3e..7d74ec8701569 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -763,6 +763,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_allocate: case OMPD_taskyield: case OMPD_barrier: + case OMPD_error: case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_cancel: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 30c955b3d43fd..9531b855780c7 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -254,6 +254,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPTaskyieldDirectiveClass: EmitOMPTaskyieldDirective(cast(*S)); break; + case Stmt::OMPErrorDirectiveClass: + EmitOMPErrorDirective(cast(*S)); + break; case Stmt::OMPBarrierDirectiveClass: EmitOMPBarrierDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 40d84d754f9d3..4619be474b463 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1347,6 +1347,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( case OMPD_parallel_for_simd: case OMPD_task: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_taskgroup: @@ -5244,6 +5245,10 @@ void CodeGenFunction::EmitOMPTaskyieldDirective( CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); } +void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { + llvm_unreachable("CodeGen for 'omp error' is not supported yet."); +} + void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8921cb78de996..560a9d97381c8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3520,6 +3520,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S); void EmitOMPTaskDirective(const OMPTaskDirective &S); void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S); + void EmitOMPErrorDirective(const OMPErrorDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 6f8a467b9a657..7f48b16f97d61 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2310,6 +2310,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_unroll: case OMPD_task: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_taskgroup: @@ -2410,8 +2411,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /// annot_pragma_openmp 'parallel' | 'simd' | 'for' | 'sections' | /// 'section' | 'single' | 'master' | 'critical' [ '(' ')' ] | /// 'parallel for' | 'parallel sections' | 'parallel master' | 'task' | -/// 'taskyield' | 'barrier' | 'taskwait' | 'flush' | 'ordered' | -/// 'atomic' | 'for simd' | 'parallel for simd' | 'target' | 'target +/// 'taskyield' | 'barrier' | 'taskwait' | 'flush' | 'ordered' | 'error' +/// | 'atomic' | 'for simd' | 'parallel for simd' | 'target' | 'target /// data' | 'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' | /// 'master taskloop' | 'master taskloop simd' | 'parallel master /// taskloop' | 'parallel master taskloop simd' | 'distribute' | 'target @@ -2697,6 +2698,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( case OMPD_depobj: case OMPD_scan: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index d8344cfd01f95..75ec180552f4f 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1494,6 +1494,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPTaskLoopSimdDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: case Stmt::OMPTaskyieldDirectiveClass: + case Stmt::OMPErrorDirectiveClass: case Stmt::OMPTeamsDirectiveClass: case Stmt::OMPTeamsDistributeDirectiveClass: case Stmt::OMPTeamsDistributeParallelForDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fd15c4d39b3b1..9906f636201c2 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4532,6 +4532,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -6305,6 +6306,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( "No associated statement allowed for 'omp taskyield' directive"); Res = ActOnOpenMPTaskyieldDirective(StartLoc, EndLoc); break; + case OMPD_error: + assert(AStmt == nullptr && + "No associated statement allowed for 'omp taskyield' directive"); + Res = ActOnOpenMPErrorDirective(ClausesWithImplicit, StartLoc, EndLoc); + break; case OMPD_barrier: assert(ClausesWithImplicit.empty() && "No clauses are allowed for 'omp barrier' directive"); @@ -11020,6 +11026,12 @@ StmtResult Sema::ActOnOpenMPBarrierDirective(SourceLocation StartLoc, return OMPBarrierDirective::Create(Context, StartLoc, EndLoc); } +StmtResult Sema::ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPErrorDirective::Create(Context, StartLoc, EndLoc, Clauses); +} + StmtResult Sema::ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, SourceLocation StartLoc, SourceLocation EndLoc) { @@ -15313,6 +15325,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15401,6 +15414,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15497,6 +15511,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15588,6 +15603,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15676,6 +15692,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15767,6 +15784,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15861,6 +15879,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15952,6 +15971,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index f0d3a5ca089a3..ab34a9d611b9c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8897,6 +8897,17 @@ TreeTransform::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPErrorDirective(OMPErrorDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_error, DirName, nullptr, + D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPTaskgroupDirective( OMPTaskgroupDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index f6a2f85682b2a..2a3c6e7231785 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2412,6 +2412,13 @@ void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { VisitOMPExecutableDirective(D); } +void ASTStmtReader::VisitOMPErrorDirective(OMPErrorDirective *D) { + VisitStmt(D); + // The NumClauses field was read in ReadStmtFromStream. + Record.skipInts(1); + VisitOMPExecutableDirective(D); +} + void ASTStmtReader::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); @@ -3359,6 +3366,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { Context, Record[ASTStmtReader::NumStmtFields], Empty); break; + case STMT_OMP_ERROR_DIRECTIVE: + S = OMPErrorDirective::CreateEmpty( + Context, Record[ASTStmtReader::NumStmtFields], Empty); + break; + case STMT_OMP_TASKGROUP_DIRECTIVE: S = OMPTaskgroupDirective::CreateEmpty( Context, Record[ASTStmtReader::NumStmtFields], Empty); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index be28ad6b2a668..e2ba69ca1eec8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2396,6 +2396,13 @@ void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { Code = serialization::STMT_OMP_TASKWAIT_DIRECTIVE; } +void ASTStmtWriter::VisitOMPErrorDirective(OMPErrorDirective *D) { + VisitStmt(D); + Record.push_back(D->getNumClauses()); + VisitOMPExecutableDirective(D); + Code = serialization::STMT_OMP_ERROR_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 5372c745f231f..1a2578b85f08f 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1744,6 +1744,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTaskyieldDirectiveClass: case Stmt::OMPBarrierDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: + case Stmt::OMPErrorDirectiveClass: case Stmt::OMPTaskgroupDirectiveClass: case Stmt::OMPFlushDirectiveClass: case Stmt::OMPDepobjDirectiveClass: diff --git a/clang/test/OpenMP/error_ast_print.cpp b/clang/test/OpenMP/error_ast_print.cpp new file mode 100644 index 0000000000000..fbdf68a11634d --- /dev/null +++ b/clang/test/OpenMP/error_ast_print.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo() {} +// CHECK: template int tmain(T argc, char **argv) +// CHECK: static int a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: a = argv[0][0]; +// CHECK-NEXT: ++a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: { +// CHECK-NEXT: int b = 10; +// CHECK-NEXT: T c = 100; +// CHECK-NEXT: a = b + c; +// CHECK-NEXT: } +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: foo(); +// CHECK-NEXT: return N; + +template +int tmain(T argc, char **argv) { + T b = argc, c, d, e, f, g; + static int a; +#pragma omp error + a = argv[0][0]; + ++a; +#pragma omp error + { + int b = 10; + T c = 100; + a = b + c; + } +#pragma omp error + foo(); +return N; +} + +// CHECK: int main(int argc, char **argv) +// CHECK-NEXT: int b = argc, c, d, e, f, g; +// CHECK-NEXT: static int a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: a = 2; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: foo(); +int main (int argc, char **argv) { + int b = argc, c, d, e, f, g; + static int a; +#pragma omp error + a=2; +#pragma omp error + foo(); +} +#endif diff --git a/clang/test/OpenMP/error_message.cpp b/clang/test/OpenMP/error_message.cpp new file mode 100644 index 0000000000000..3f5a4cc243eb3 --- /dev/null +++ b/clang/test/OpenMP/error_message.cpp @@ -0,0 +1,114 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +template +T tmain(T argc) { + if (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + if (argc) { +#pragma omp error + } + while (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) { +#pragma omp error + } + do +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp error + } while (argc); + switch (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp error + } + switch (argc) { +#pragma omp error + case 1: +#pragma omp error + break; + default: { +#pragma omp error + } break; + } + for (;;) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + for (;;) { +#pragma omp error + } +label: +#pragma omp error +label1 : { +#pragma omp error +} +if (1) + label2: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + + return T(); +} + +int main(int argc, char **argv) { +#pragma omp error + ; +#pragma omp error untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp error'}} +#pragma omp error unknown // expected-warning {{extra tokens at the end of '#pragma omp error' are ignored}} + if (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + if (argc) { +#pragma omp error + } + while (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) { +#pragma omp error + } + do +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp error + } while (argc); + switch (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp error + } + switch (argc) { +#pragma omp error + case 1: +#pragma omp error + break; + default: { +#pragma omp error + } break; + } + for (;;) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + for (;;) { +#pragma omp error + } +label: +#pragma omp error +label1 : { +#pragma omp error +} +if (1) + label2: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + + return tmain(argc); +} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 728625ceb023f..ee6773531cfda 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2164,6 +2164,7 @@ class EnqueueVisitor : public ConstStmtVisitor { void VisitOMPTaskyieldDirective(const OMPTaskyieldDirective *D); void VisitOMPBarrierDirective(const OMPBarrierDirective *D); void VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *D); + void VisitOMPErrorDirective(const OMPErrorDirective *D); void VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *D); void VisitOMPCancellationPointDirective(const OMPCancellationPointDirective *D); @@ -3114,6 +3115,10 @@ void EnqueueVisitor::VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *D) { VisitOMPExecutableDirective(D); } +void EnqueueVisitor::VisitOMPErrorDirective(const OMPErrorDirective *D) { + VisitOMPExecutableDirective(D); +} + void EnqueueVisitor::VisitOMPTaskgroupDirective( const OMPTaskgroupDirective *D) { VisitOMPExecutableDirective(D); @@ -5819,6 +5824,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPBarrierDirective"); case CXCursor_OMPTaskwaitDirective: return cxstring::createRef("OMPTaskwaitDirective"); + case CXCursor_OMPErrorDirective: + return cxstring::createRef("OMPErrorDirective"); case CXCursor_OMPTaskgroupDirective: return cxstring::createRef("OMPTaskgroupDirective"); case CXCursor_OMPFlushDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index a90f414663ffe..3eda1e3e38d85 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -712,6 +712,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPTaskwaitDirectiveClass: K = CXCursor_OMPTaskwaitDirective; break; + case Stmt::OMPErrorDirectiveClass: + K = CXCursor_OMPErrorDirective; + break; case Stmt::OMPTaskgroupDirectiveClass: K = CXCursor_OMPTaskgroupDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 0cf9d97317184..6de8a3ea93f1e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -526,6 +526,7 @@ def OMP_Critical : Directive<"critical"> { } def OMP_TaskYield : Directive<"taskyield"> {} def OMP_Barrier : Directive<"barrier"> {} +def OMP_Error : Directive<"error"> {} def OMP_TaskWait : Directive<"taskwait"> { let allowedClauses = [ VersionedClause From cdde2706cf4a78e5a404ae160323cabd7839fbe4 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Wed, 2 Nov 2022 14:53:51 -0700 Subject: [PATCH 089/516] [libc++][Android] XFAIL aligned_alloc and timespec_get tests Mark tests XFAIL that use APIs that are unsupported on old versions of Android: - aligned_alloc isn't available until API 28. - timespec_get isn't available until API 29. Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D137134 --- .../depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp | 3 +++ .../support.runtime/cstdlib.aligned_alloc.compile.pass.cpp | 3 +++ .../support.runtime/ctime.timespec.compile.pass.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp index e2565641aba66..a02a5bbbdbcc7 100644 --- a/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp @@ -18,6 +18,9 @@ // ::aligned_alloc is not implemented on Windows // XFAIL: target={{.+}}-windows-{{.+}} +// ::aligned_alloc is available starting with Android P (API 28) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27)}} + #include #include diff --git a/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp index 70bf3ecd539f0..f6681db6cf6bb 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp @@ -18,6 +18,9 @@ // ::aligned_alloc is not implemented on Windows // XFAIL: target={{.+}}-windows-{{.+}} +// ::aligned_alloc is available starting with Android P (API 28) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27)}} + #include #include diff --git a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp index 111d0912cbbe2..37c7b67234782 100644 --- a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp @@ -17,6 +17,9 @@ // unavailable until macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} +// ::timespec_get is available starting with Android Q (API 29) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27|28)}} + #include #include From 9e6049527f9b5975a456d44afcdc4cec79a2bec6 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Wed, 2 Nov 2022 14:55:56 -0700 Subject: [PATCH 090/516] [libc++][Android] strong_order_long_double.verify.cpp: disable on i686 This target (as well as 32-bit ARM Android) have sizeof(long double) equal to sizeof(double). Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D137135 --- .../cmp/cmp.alg/strong_order_long_double.verify.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp index 25019711a71eb..1a9289c7c966c 100644 --- a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp @@ -17,6 +17,8 @@ // ARM/AArch64 MinGW also has got long double equal to regular double, just // like MSVC (thus match both MinGW and MSVC here, for those architectures). // UNSUPPORTED: target={{aarch64|armv7}}-{{.*}}-windows-{{.+}} +// Android's 32-bit x86 target has long double equal to regular double. +// UNSUPPORTED: target=i686-{{.+}}-android{{.*}} // From f970b007e55d6dab6d84d98a39658a58019eb06e Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 2 Nov 2022 22:34:05 +0000 Subject: [PATCH 091/516] [ARM] Fix vector ule zero lowering The instruction icmp ule <4 x i32> %0, zeroinitializer will usually be simplified to icmp eq <4 x i32> %0, zeroinitializer. It is not guaranteed though, and the code for lowering vector compares could pick the wrong form of the instruction if this happened. I've tried to make the code more explicit about the supported conditions. This fixes NEON being unable to select VCMPZ with HS conditions, and fixes some incorrect MVE patterns. Fixes #58514. Differential Revision: https://reviews.llvm.org/D136447 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 18 +++++++------- llvm/test/CodeGen/ARM/vcmpz.ll | 30 ++++++++++++++++-------- llvm/test/CodeGen/Thumb2/mve-pred-and.ll | 3 ++- llvm/test/CodeGen/Thumb2/mve-pred-or.ll | 3 ++- llvm/test/CodeGen/Thumb2/mve-pred-xor.ll | 3 ++- llvm/test/CodeGen/Thumb2/mve-vcmpz.ll | 18 +++++++++----- 6 files changed, 47 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c84fe4d661974..b822f15ed193b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6855,25 +6855,25 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. - SDValue SingleOp; - if (ISD::isBuildVectorAllZeros(Op1.getNode())) - SingleOp = Op0; - else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (ISD::isBuildVectorAllZeros(Op0.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ || + Opc == ARMCC::NE)) { if (Opc == ARMCC::GE) Opc = ARMCC::LE; else if (Opc == ARMCC::GT) Opc = ARMCC::LT; - SingleOp = Op1; + std::swap(Op0, Op1); } SDValue Result; - if (SingleOp.getNode()) { - Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + if (ISD::isBuildVectorAllZeros(Op1.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE || + Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ)) + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0, DAG.getConstant(Opc, dl, MVT::i32)); - } else { + else Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, DAG.getConstant(Opc, dl, MVT::i32)); - } Result = DAG.getSExtOrTrunc(Result, dl, VT); diff --git a/llvm/test/CodeGen/ARM/vcmpz.ll b/llvm/test/CodeGen/ARM/vcmpz.ll index f800346a6b564..51b5d28d8192e 100644 --- a/llvm/test/CodeGen/ARM/vcmpz.ll +++ b/llvm/test/CodeGen/ARM/vcmpz.ll @@ -174,11 +174,16 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ult(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { -; %2 = icmp ule <4 x i32> %0, zeroinitializer -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zr_ule: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp ule <4 x i32> %0, zeroinitializer + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ugt(<4 x i32> %0) { ; CHECK-LABEL: vcmpz_zr_ugt: @@ -294,8 +299,13 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_ugt(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { -; %2 = icmp uge <4 x i32> zeroinitializer, %0 -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zl_uge: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp uge <4 x i32> zeroinitializer, %0 + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll index e745fafdbea72..e8d5eadabf7f9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll @@ -122,8 +122,9 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vpt.i32 eq, q0, zr -; CHECK-NEXT: vcmpt.u32 cs, q1, zr +; CHECK-NEXT: vcmpt.u32 cs, q2, q1 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index cb3f554e21b0a..435ddf0a6e57b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -123,7 +123,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vpnot ; CHECK-NEXT: vpst ; CHECK-NEXT: vcmpt.i32 ne, q0, zr diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index e5fef332034fe..0ff262e6b53ab 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -151,7 +151,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmrs r1, p0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll index fcb9d136307fe..aaf49c76a07a0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -110,7 +110,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_ulez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -229,7 +230,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_ulez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -348,7 +350,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_ulez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -489,7 +492,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_r_ugez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -608,7 +612,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_r_ugez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -727,7 +732,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_r_ugez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: From 1ceafe5e0f694797dab3b44a93ac8b098739d47f Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 2 Nov 2022 20:19:04 +0000 Subject: [PATCH 092/516] [libc] Add implementation of ungetc. A bug in the file read logic has also been fixed along the way. Parts of the ungetc tests will fail without that bug fixed. Reviewed By: michaelrj Differential Revision: https://reviews.llvm.org/D137286 --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/spec/stdc.td | 5 ++ libc/src/__support/File/file.cpp | 44 +++++++++++++++++- libc/src/__support/File/file.h | 8 ++++ libc/src/stdio/CMakeLists.txt | 12 +++++ libc/src/stdio/ungetc.cpp | 20 ++++++++ libc/src/stdio/ungetc.h | 20 ++++++++ libc/test/src/stdio/CMakeLists.txt | 16 +++++++ libc/test/src/stdio/ungetc_test.cpp | 59 ++++++++++++++++++++++++ 9 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 libc/src/stdio/ungetc.cpp create mode 100644 libc/src/stdio/ungetc.h create mode 100644 libc/test/src/stdio/ungetc_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 7b0fb53451004..17f2c994c12fb 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -396,6 +396,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout + libc.src.stdio.ungetc # stdlib.h entrypoints libc.src.stdlib._Exit diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index a7a9df46747f4..69a3ac1e26d8b 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -641,6 +641,11 @@ def StdC : StandardSpec<"stdc"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "ungetc", + RetValSpec, + [ArgSpec, ArgSpec] + >, ], [ ObjectSpec< diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index b5d00b7a876da..352b1a4d24005 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -203,10 +203,14 @@ size_t File::read_unlocked(void *data, size_t len) { for (size_t i = 0; i < available_data; ++i) dataref[i] = bufref[i + pos]; read_limit = pos = 0; // Reset the pointers. + // Update the dataref to reflect that fact that we have already + // copied |available_data| into |data|. + dataref = cpp::span(dataref.data() + available_data, + dataref.size() - available_data); size_t to_fetch = len - available_data; if (to_fetch > bufsize) { - size_t fetched_size = platform_read(this, data, to_fetch); + size_t fetched_size = platform_read(this, dataref.data(), to_fetch); if (fetched_size < to_fetch) { if (errno == 0) eof = true; @@ -233,6 +237,44 @@ size_t File::read_unlocked(void *data, size_t len) { return transfer_size + available_data; } +int File::ungetc_unlocked(int c) { + // There is no meaning to unget if: + // 1. You are trying to push back EOF. + // 2. Read operations are not allowed on this file. + // 3. The previous operation was a write operation. + if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE)) + return EOF; + + cpp::span bufref(static_cast(buf), bufsize); + if (read_limit == 0) { + // If |read_limit| is zero, it can mean three things: + // a. This file was just created. + // b. The previous operation was a seek operation. + // c. The previous operation was a read operation which emptied + // the buffer. + // For all the above cases, we simply write |c| at the beginning + // of the buffer and bump |read_limit|. Note that |pos| will also + // be zero in this case, so we don't need to adjust it. + bufref[0] = static_cast(c); + ++read_limit; + } else { + // If |read_limit| is non-zero, it means that there is data in the buffer + // from a previous read operation. Which would also mean that |pos| is not + // zero. So, we decrement |pos| and write |c| in to the buffer at the new + // |pos|. If too many ungetc operations are performed without reads, it + // can lead to (pos == 0 but read_limit != 0). We will just error out in + // such a case. + if (pos == 0) + return EOF; + --pos; + bufref[pos] = static_cast(c); + } + + eof = false; // There is atleast one character that can be read now. + err = false; // This operation was a success. + return c; +} + int File::seek(long offset, int whence) { FileLock lock(this); if (prev_op == FileOp::WRITE && pos > 0) { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 74655b1301b85..7ea780d94f555 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -187,6 +187,14 @@ class File { int flush_unlocked(); + // Returns EOF on error and keeps the file unchanged. + int ungetc_unlocked(int c); + + int ungetc(int c) { + FileLock lock(this); + return ungetc_unlocked(c); + } + // Sets the internal buffer to |buffer| with buffering mode |mode|. // |size| is the size of |buffer|. This new |buffer| is owned by the // stream only if |owned| is true. diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 22536a515bd58..f8b197d984c52 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -282,6 +282,18 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( fopencookie SRCS diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/ungetc.cpp new file mode 100644 index 0000000000000..de6ce0ba0683d --- /dev/null +++ b/libc/src/stdio/ungetc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + return reinterpret_cast<__llvm_libc::File *>(stream)->ungetc(c); +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/ungetc.h b/libc/src/stdio/ungetc.h new file mode 100644 index 0000000000000..b5b7acb5962c1 --- /dev/null +++ b/libc/src/stdio/ungetc.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ungetc -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_UNGETC_H +#define LLVM_LIBC_SRC_STDIO_UNGETC_H + +#include + +namespace __llvm_libc { + +int ungetc(int c, ::FILE *stream); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_UNGETC_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 515619e2aa822..904c669d63da6 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -21,6 +21,22 @@ add_libc_unittest( libc.src.stdio.fwrite ) +add_libc_unittest( + ungetc_test + SUITE + libc_stdio_unittests + SRCS + ungetc_test.cpp + DEPENDS + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.fwrite + libc.src.stdio.ungetc +) + add_libc_unittest( unlocked_fileop_test SUITE diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp new file mode 100644 index 0000000000000..0102be7b2e0fb --- /dev/null +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -0,0 +1,59 @@ +//===-- Unittests for ungetc ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fseek.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/ungetc.h" +#include "utils/UnitTest/Test.h" + +#include + +TEST(LlvmLibcUngetcTest, UngetAndReadBack) { + constexpr char FILENAME[] = "testdata/ungetc_test.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + // Cannot unget to an un-readable file. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r+"); + ASSERT_FALSE(file == nullptr); + char c; + ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1)); + ASSERT_EQ(c, CONTENT[0]); + ASSERT_EQ(__llvm_libc::ungetc(int(c), file), int(c)); + + char data[CONTENT_SIZE]; + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, file)); + ASSERT_STREQ(CONTENT, data); + + ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET)); + // ungetc should not fail after a seek operation. + int unget_char = 'z'; + ASSERT_EQ(unget_char, __llvm_libc::ungetc(unget_char, file)); + // Another unget should fail. + ASSERT_EQ(EOF, __llvm_libc::ungetc(unget_char, file)); + // ungetting a char at the beginning of the file will allow us to fetch + // one additional character. + char new_data[CONTENT_SIZE + 1]; + ASSERT_EQ(CONTENT_SIZE + 1, + __llvm_libc::fread(new_data, 1, CONTENT_SIZE + 1, file)); + ASSERT_STREQ("zabcdef", new_data); + + ASSERT_EQ(size_t(1), __llvm_libc::fwrite("x", 1, 1, file)); + // unget should fail after a write operation. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} From 0efff7cdcb26917b9acd1a280911317ea12dc937 Mon Sep 17 00:00:00 2001 From: electriclilies Date: Wed, 2 Nov 2022 15:56:28 -0700 Subject: [PATCH 093/516] [mlir] Add call_intrinsic op to LLVMIIR The call_intrinsic op allows us to call LLVM intrinsics from the LLVMDialect without implementing a new op every time. Reviewed By: lattner, rriddle Differential Revision: https://reviews.llvm.org/D137187 --- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 19 +++++ .../LLVMIR/LLVMToLLVMIRTranslation.cpp | 72 +++++++++++++++- mlir/test/Dialect/LLVMIR/call-intrin.mlir | 82 +++++++++++++++++++ 3 files changed, 169 insertions(+), 4 deletions(-) create mode 100644 mlir/test/Dialect/LLVMIR/call-intrin.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index b2257a163932c..bb2668790dbfb 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -707,6 +707,25 @@ def LLVM_vector_extract }]; } +//===--------------------------------------------------------------------===// +// CallIntrinsicOp +//===--------------------------------------------------------------------===// +def LLVM_CallIntrinsicOp : LLVM_Op<"call_intrinsic", [Pure]> { + let summary = "Call to an LLVM intrinsic function."; + let description = [{ + Call the specified llvm intrinsic. If the intrinsic is overloaded, use + the MLIR function type of this op to determine which intrinsic to call. + }]; + let arguments = (ins StrAttr:$intrin, Variadic:$args); + let results = (outs Variadic:$results); + let llvmBuilder = [{ + return convertCallLLVMIntrinsicOp(op, builder, moduleTranslation); + }]; + let assemblyFormat = [{ + $intrin `(` $args `)` `:` functional-type($args, $results) attr-dict + }]; +} + // // LLVM Vector Predication operations. // diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 1f89a55ee363e..abc2fadbbc9ac 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -258,6 +258,70 @@ static SmallVector extractPosition(ArrayRef indices) { return position; } +/// Get the declaration of an overloaded llvm intrinsic. First we get the +/// overloaded argument types and/or result type from the CallIntrinsicOp, and +/// then use those to get the correct declaration of the overloaded intrinsic. +static FailureOr +getOverloadedDeclaration(CallIntrinsicOp &op, llvm::Intrinsic::ID id, + llvm::Module *module, + LLVM::ModuleTranslation &moduleTranslation) { + SmallVector allArgTys; + for (Type type : op->getOperandTypes()) + allArgTys.push_back(moduleTranslation.convertType(type)); + + llvm::Type *resTy; + if (op.getNumResults() == 0) + resTy = llvm::Type::getVoidTy(module->getContext()); + else + resTy = moduleTranslation.convertType(op.getResult(0).getType()); + + // ATM we do not support variadic intrinsics. + llvm::FunctionType *ft = llvm::FunctionType::get(resTy, allArgTys, false); + + SmallVector table; + getIntrinsicInfoTableEntries(id, table); + ArrayRef tableRef = table; + + SmallVector overloadedArgTys; + if (llvm::Intrinsic::matchIntrinsicSignature(ft, tableRef, + overloadedArgTys) != + llvm::Intrinsic::MatchIntrinsicTypesResult::MatchIntrinsicTypes_Match) { + return op.emitOpError("intrinsic type is not a match"); + } + + ArrayRef overloadedArgTysRef = overloadedArgTys; + return llvm::Intrinsic::getDeclaration(module, id, overloadedArgTysRef); +} + +/// Builder for LLVM_CallIntrinsicOp +static LogicalResult +convertCallLLVMIntrinsicOp(CallIntrinsicOp &op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::Intrinsic::ID id = + llvm::Function::lookupIntrinsicID(op.getIntrinAttr()); + if (!id) + return op.emitOpError() + << "couldn't find intrinsic: " << op.getIntrinAttr(); + + llvm::Function *fn = nullptr; + if (llvm::Intrinsic::isOverloaded(id)) { + auto fnOrFailure = + getOverloadedDeclaration(op, id, module, moduleTranslation); + if (failed(fnOrFailure)) + return failure(); + fn = fnOrFailure.value(); + } else { + fn = llvm::Intrinsic::getDeclaration(module, id, {}); + } + + auto *inst = + builder.CreateCall(fn, moduleTranslation.lookupValues(op.getOperands())); + if (op.getNumResults() == 1) + moduleTranslation.mapValue(op->getResults().front()) = inst; + return success(); +} + static LogicalResult convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -272,8 +336,8 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, // Emit function calls. If the "callee" attribute is present, this is a // direct function call and we also need to look up the remapped function // itself. Otherwise, this is an indirect call and the callee is the first - // operand, look it up as a normal value. Return the llvm::Value representing - // the function result, which may be of llvm::VoidTy type. + // operand, look it up as a normal value. Return the llvm::Value + // representing the function result, which may be of llvm::VoidTy type. auto convertCall = [&](Operation &op) -> llvm::Value * { auto operands = moduleTranslation.lookupValues(op.getOperands()); ArrayRef operandsRef(operands); @@ -404,8 +468,8 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } - // Emit branches. We need to look up the remapped blocks and ignore the block - // arguments that were transformed into PHI nodes. + // Emit branches. We need to look up the remapped blocks and ignore the + // block arguments that were transformed into PHI nodes. if (auto brOp = dyn_cast(opInst)) { llvm::BranchInst *branch = builder.CreateBr(moduleTranslation.lookupBlock(brOp.getSuccessor())); diff --git a/mlir/test/Dialect/LLVMIR/call-intrin.mlir b/mlir/test/Dialect/LLVMIR/call-intrin.mlir new file mode 100644 index 0000000000000..30f5c9fb82572 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/call-intrin.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" +// CHECK: declare ptr @malloc(i64) +// CHECK: declare void @free(ptr) +// CHECK: define <4 x float> @round_sse41() { +// CHECK: %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> , i32 1) +// CHECK: ret <4 x float> %1 +// CHECK: } +llvm.func @round_sse41() -> vector<4xf32> { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(dense<0.2> : vector<4xf32>) : vector<4xf32> + %res = llvm.call_intrinsic "llvm.x86.sse41.round.ss"(%1, %1, %0) : (vector<4xf32>, vector<4xf32>, i32) -> vector<4xf32> {} + llvm.return %res: vector<4xf32> +} + +// ----- + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" + +// CHECK: declare ptr @malloc(i64) + +// CHECK: declare void @free(ptr) + +// CHECK: define float @round_overloaded() { +// CHECK: %1 = call float @llvm.round.f32(float 1.000000e+00) +// CHECK: ret float %1 +// CHECK: } +llvm.func @round_overloaded() -> f32 { + %0 = llvm.mlir.constant(1.0 : f32) : f32 + %res = llvm.call_intrinsic "llvm.round"(%0) : (f32) -> f32 {} + llvm.return %res: f32 +} + +// ----- + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" +// CHECK: declare ptr @malloc(i64) +// CHECK: declare void @free(ptr) +// CHECK: define void @lifetime_start() { +// CHECK: %1 = alloca float, i8 1, align 4 +// CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %1) +// CHECK: ret void +// CHECK: } +llvm.func @lifetime_start() { + %0 = llvm.mlir.constant(4 : i64) : i64 + %1 = llvm.mlir.constant(1 : i8) : i8 + %2 = llvm.alloca %1 x f32 : (i8) -> !llvm.ptr + llvm.call_intrinsic "llvm.lifetime.start"(%0, %2) : (i64, !llvm.ptr) -> () {} + llvm.return +} + +// ----- + +llvm.func @variadic() { + %0 = llvm.mlir.constant(1 : i8) : i8 + %1 = llvm.alloca %0 x f32 : (i8) -> !llvm.ptr + llvm.call_intrinsic "llvm.localescape"(%1, %1) : (!llvm.ptr, !llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @no_intrinsic() { + // expected-error@below {{'llvm.call_intrinsic' op couldn't find intrinsic: "llvm.does_not_exist"}} + // expected-error@below {{LLVM Translation failed for operation: llvm.call_intrinsic}} + llvm.call_intrinsic "llvm.does_not_exist"() : () -> () + llvm.return +} + +// ----- + +llvm.func @bad_types() { + %0 = llvm.mlir.constant(1 : i8) : i8 + // expected-error@below {{'llvm.call_intrinsic' op intrinsic type is not a match}} + // expected-error@below {{LLVM Translation failed for operation: llvm.call_intrinsic}} + llvm.call_intrinsic "llvm.round"(%0) : (i8) -> i8 {} + llvm.return +} From 4fed59ed41007c2380d6742f6a91178e2f35fa01 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 13:17:30 -0700 Subject: [PATCH 094/516] FunctionLoweringInfo: Use TLI member instead of finding it --- llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b1e369d21887e..bf67ef99b6b43 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -369,8 +369,7 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { - return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); + return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -381,8 +380,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// will assign registers for each member or element. /// Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - SmallVector ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); From cbce11c42266fd2f61bc52d427f1c2f3d0290c5a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 13:44:57 -0700 Subject: [PATCH 095/516] WebAssembly: Move exception handling code together --- llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index bf67ef99b6b43..3e59d0d2b753d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } - if (Personality == EHPersonality::Wasm_CXX) { - WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); - calculateWasmEHInfo(&fn, EHInfo); - } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const auto *BB = CME.Handler.get(); CME.Handler = MBBMap[BB]; } - } - - else if (Personality == EHPersonality::Wasm_CXX) { + } else if (Personality == EHPersonality::Wasm_CXX) { WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + // Map all BB references in the Wasm EH data to MBBs. DenseMap SrcToUnwindDest; for (auto &KV : EHInfo.SrcToUnwindDest) { From 1179bdf300258d4e6eb90b23aeeae89950d922c5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 14:29:14 -0700 Subject: [PATCH 096/516] WebAssembly: Remove unnecessary set check The empty set will be default constructed if this wasn't in the map already. --- llvm/include/llvm/CodeGen/WasmEHFuncInfo.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h index 8b55a45b61e8b..60ee6493b1a1f 100644 --- a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h +++ b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h @@ -50,8 +50,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) { SrcToUnwindDest[BB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet(); UnwindDestToSrcs[Dest].insert(BB); } bool hasUnwindDest(const BasicBlock *BB) const { @@ -76,8 +74,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) { SrcToUnwindDest[MBB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet(); UnwindDestToSrcs[Dest].insert(MBB); } bool hasUnwindDest(MachineBasicBlock *MBB) const { From 48732d3541df14f6206b0e93f0e08b4eabd1176c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 29 Oct 2022 12:52:24 -0700 Subject: [PATCH 097/516] SPARC: Register null target streamer Fixes null dereference in emitFunctionBodyStart for 64-bit --- llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp | 7 +++++++ llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h | 4 ++-- llvm/test/CodeGen/SPARC/64bit.ll | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index b11c786e7856d..d6688c31334c2 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -84,6 +84,10 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, return new SparcTargetAsmStreamer(S, OS); } +static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) { + return new SparcTargetStreamer(S); +} + static MCInstPrinter *createSparcMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -122,6 +126,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetMC() { // Register the asm streamer. TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer); + // Register the null streamer. + TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer); + // Register the MCInstPrinter TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter); } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h index 27976d166067b..ef28afa06bffb 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h @@ -22,9 +22,9 @@ class SparcTargetStreamer : public MCTargetStreamer { public: SparcTargetStreamer(MCStreamer &S); /// Emit ".register , #ignore". - virtual void emitSparcRegisterIgnore(unsigned reg) = 0; + virtual void emitSparcRegisterIgnore(unsigned reg){}; /// Emit ".register , #scratch". - virtual void emitSparcRegisterScratch(unsigned reg) = 0; + virtual void emitSparcRegisterScratch(unsigned reg){}; }; // This part is for ascii assembly output diff --git a/llvm/test/CodeGen/SPARC/64bit.ll b/llvm/test/CodeGen/SPARC/64bit.ll index c61476eb2265d..c079d901a03d1 100644 --- a/llvm/test/CodeGen/SPARC/64bit.ll +++ b/llvm/test/CodeGen/SPARC/64bit.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=sparcv9 -mattr=+popc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s ; RUN: llc < %s -march=sparcv9 -mattr=+popc | FileCheck %s -check-prefix=OPT +; RUN: llc %s -march=sparcv9 -mattr=+popc -filetype=null ; CHECK-LABEL: ret2: ; CHECK: mov %i1, %i0 From 40e99473170f5045e0b5f2cafabd2a1be8c7ec26 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Wed, 2 Nov 2022 11:54:19 -0700 Subject: [PATCH 098/516] [Clang] follow-up D128745, remove ClangABICompat checks Per discussions in D128745, remove ClangABICompat checks for implementations of DR692/DR1395/DR1432. This is a potentially breaking changes, so the release note is updated accordingly. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D136120 --- clang/docs/ReleaseNotes.rst | 23 ++- clang/lib/Sema/SemaTemplateDeduction.cpp | 157 ++++++++---------- clang/test/CodeGen/partial-order-variadic.cpp | 33 ---- clang/test/SemaCXX/pre-dr692.cpp | 14 -- 4 files changed, 85 insertions(+), 142 deletions(-) delete mode 100644 clang/test/CodeGen/partial-order-variadic.cpp delete mode 100644 clang/test/SemaCXX/pre-dr692.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1198926974bff..435d9ded7c72e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -161,6 +161,21 @@ code bases. - The ``-fexperimental-new-pass-manager`` and ``-fno-legacy-pass-manager`` flags have been removed. These have been no-ops since 15.0.0. +- As a side effect of implementing DR692/DR1395/DR1432, Clang now rejects some + overloaded function templates as ambiguous when one of the candidates has a + trailing parameter pack. + + .. code-block:: c++ + + template void g(T, T = T()); + template void g(T, U...); + void h() { + // This is rejected due to ambiguity between the pack and the + // default argument. Only parameters with arguments are considered during + // partial ordering of function templates. + g(42); + } + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -551,10 +566,10 @@ C2x Feature Support C++ Language Changes in Clang ----------------------------- -- Implemented DR692, DR1395 and DR1432. Use the ``-fclang-abi-compat=15`` option - to get the old partial ordering behavior regarding packs. Note that the fix for - DR1432 is speculative that there is no wording or even resolution for this issue. - A speculative fix for DR1432 is needed because it fixes regressions caused by DR692. +- Implemented `DR692 `_, `DR1395 `_, + and `DR1432 `_. The fix for DR1432 is speculative since the + issue is still open and has no proposed resolution at this time. A speculative fix + for DR1432 is needed to prevent regressions that would otherwise occur due to DR692. - Clang's default C++/ObjC++ standard is now ``gnu++17`` instead of ``gnu++14``. This means Clang will by default accept code using features from C++17 and conforming GNU extensions. Projects incompatible with C++17 can add diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 17c0e2f04f4bd..3db06a51e4eb7 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -1128,9 +1128,7 @@ DeduceTemplateArguments(Sema &S, // During partial ordering, if Ai was originally a function parameter pack: // - if P does not contain a function parameter type corresponding to Ai then // Ai is ignored; - bool ClangABICompat15 = S.Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver15; - if (!ClangABICompat15 && PartialOrdering && ArgIdx + 1 == NumArgs && + if (PartialOrdering && ArgIdx + 1 == NumArgs && isa(Args[ArgIdx])) return Sema::TDK_Success; @@ -2466,9 +2464,6 @@ static bool isSameTemplateArg(ASTContext &Context, if (X.getKind() != Y.getKind()) return false; - bool ClangABICompat15 = - Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver15; - switch (X.getKind()) { case TemplateArgument::Null: llvm_unreachable("Comparing NULL template argument"); @@ -2500,45 +2495,33 @@ static bool isSameTemplateArg(ASTContext &Context, return XID == YID; } - case TemplateArgument::Pack: - if (ClangABICompat15) { - if (X.pack_size() != Y.pack_size()) + case TemplateArgument::Pack: { + unsigned PackIterationSize = X.pack_size(); + if (X.pack_size() != Y.pack_size()) { + if (!PartialOrdering) return false; - for (TemplateArgument::pack_iterator XP = X.pack_begin(), - XPEnd = X.pack_end(), - YP = Y.pack_begin(); - XP != XPEnd; ++XP, ++YP) - if (!isSameTemplateArg(Context, *XP, *YP, PartialOrdering, - PackExpansionMatchesPack)) - return false; - } else { - unsigned PackIterationSize = X.pack_size(); - if (X.pack_size() != Y.pack_size()) { - if (!PartialOrdering) - return false; - - // C++0x [temp.deduct.type]p9: - // During partial ordering, if Ai was originally a pack expansion: - // - if P does not contain a template argument corresponding to Ai - // then Ai is ignored; - bool XHasMoreArg = X.pack_size() > Y.pack_size(); - if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && - !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) - return false; - - if (XHasMoreArg) - PackIterationSize = Y.pack_size(); - } + // C++0x [temp.deduct.type]p9: + // During partial ordering, if Ai was originally a pack expansion: + // - if P does not contain a template argument corresponding to Ai + // then Ai is ignored; + bool XHasMoreArg = X.pack_size() > Y.pack_size(); + if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && + !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) + return false; - ArrayRef XP = X.pack_elements(); - ArrayRef YP = Y.pack_elements(); - for (unsigned i = 0; i < PackIterationSize; ++i) - if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, - PackExpansionMatchesPack)) - return false; + if (XHasMoreArg) + PackIterationSize = Y.pack_size(); } + + ArrayRef XP = X.pack_elements(); + ArrayRef YP = Y.pack_elements(); + for (unsigned i = 0; i < PackIterationSize; ++i) + if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, + PackExpansionMatchesPack)) + return false; return true; + } } llvm_unreachable("Invalid TemplateArgument Kind!"); @@ -5245,34 +5228,30 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate( // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that // there is no wording or even resolution for this issue. - bool ClangABICompat15 = - Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver15; - if (!ClangABICompat15) { - for (int i = 0, e = std::min(NumParams1, NumParams2); i < e; ++i) { - QualType T1 = FD1->getParamDecl(i)->getType().getCanonicalType(); - QualType T2 = FD2->getParamDecl(i)->getType().getCanonicalType(); - auto *TST1 = dyn_cast(T1); - auto *TST2 = dyn_cast(T2); - if (!TST1 || !TST2) - continue; - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return FT2; - if (PackSize1 < PackSize2 && IsPackExpansion2) - return FT1; - } + for (int i = 0, e = std::min(NumParams1, NumParams2); i < e; ++i) { + QualType T1 = FD1->getParamDecl(i)->getType().getCanonicalType(); + QualType T2 = FD2->getParamDecl(i)->getType().getCanonicalType(); + auto *TST1 = dyn_cast(T1); + auto *TST2 = dyn_cast(T2); + if (!TST1 || !TST2) + continue; + const TemplateArgument &TA1 = TST1->template_arguments().back(); + if (TA1.getKind() == TemplateArgument::Pack) { + assert(TST1->template_arguments().size() == + TST2->template_arguments().size()); + const TemplateArgument &TA2 = TST2->template_arguments().back(); + assert(TA2.getKind() == TemplateArgument::Pack); + unsigned PackSize1 = TA1.pack_size(); + unsigned PackSize2 = TA2.pack_size(); + bool IsPackExpansion1 = + PackSize1 && TA1.pack_elements().back().isPackExpansion(); + bool IsPackExpansion2 = + PackSize2 && TA2.pack_elements().back().isPackExpansion(); + if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { + if (PackSize1 > PackSize2 && IsPackExpansion1) + return FT2; + if (PackSize1 < PackSize2 && IsPackExpansion2) + return FT1; } } } @@ -5618,29 +5597,25 @@ getMoreSpecialized(Sema &S, QualType T1, QualType T2, TemplateLikeDecl *P1, // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that // there is no wording or even resolution for this issue. - bool ClangABICompat15 = S.Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver15; - if (!ClangABICompat15) { - auto *TST1 = cast(T1); - auto *TST2 = cast(T2); - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return GetP2()(P1, P2); - if (PackSize1 < PackSize2 && IsPackExpansion2) - return P1; - } + auto *TST1 = cast(T1); + auto *TST2 = cast(T2); + const TemplateArgument &TA1 = TST1->template_arguments().back(); + if (TA1.getKind() == TemplateArgument::Pack) { + assert(TST1->template_arguments().size() == + TST2->template_arguments().size()); + const TemplateArgument &TA2 = TST2->template_arguments().back(); + assert(TA2.getKind() == TemplateArgument::Pack); + unsigned PackSize1 = TA1.pack_size(); + unsigned PackSize2 = TA2.pack_size(); + bool IsPackExpansion1 = + PackSize1 && TA1.pack_elements().back().isPackExpansion(); + bool IsPackExpansion2 = + PackSize2 && TA2.pack_elements().back().isPackExpansion(); + if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { + if (PackSize1 > PackSize2 && IsPackExpansion1) + return GetP2()(P1, P2); + if (PackSize1 < PackSize2 && IsPackExpansion2) + return P1; } } diff --git a/clang/test/CodeGen/partial-order-variadic.cpp b/clang/test/CodeGen/partial-order-variadic.cpp deleted file mode 100644 index a10cd6812f988..0000000000000 --- a/clang/test/CodeGen/partial-order-variadic.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fclang-abi-compat=15 -DCLANG_ABI_COMPAT=15 %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,AFTER-15 - -// CHECK: %struct.S = type { i8 } -// CHECK: @_Z2ggiRi -// CHECK: @_Z1gIiJEERiPT_DpT0_ -template int &g(T *, U...); -template void g(T); -template struct S; -template struct S {}; -void gg(int i, int &r) { - r = g(&i); - S a; -} - -// CHECK: @_Z1hIJiEEvDpPT_ -template void h(T*...) {} -template void h(const T&) {} -template void h(int*); - -#if !defined(CLANG_ABI_COMPAT) - -// AFTER-15: @_Z1fIiJEEvPT_DpT0_ -template void f(T*, U...){} -template void f(T){} -template void f(int*); - -template struct A; -template struct A {}; -template struct A; -template struct A; - -#endif diff --git a/clang/test/SemaCXX/pre-dr692.cpp b/clang/test/SemaCXX/pre-dr692.cpp deleted file mode 100644 index 87eac318dc067..0000000000000 --- a/clang/test/SemaCXX/pre-dr692.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 %s -std=c++11 -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -fclang-abi-compat=15 - -template struct A1 {}; -template struct A2 {}; -template void e1(A1); // expected-note {{candidate}} -template void e1(A1); // expected-note {{candidate}} -template void e2(A2); // expected-note {{candidate}} -template void e2(A2); // expected-note {{candidate}} -void h() { - A1 b1; - e1(b1); // expected-error{{call to 'e1' is ambiguous}} - A2 b2; - e2(b2); // expected-error{{call to 'e2' is ambiguous}} -} From 74d8628cf7cbf442f37fbc3a7012ed77e8749d3c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 2 Nov 2022 23:57:06 +0000 Subject: [PATCH 099/516] [ConstraintElimination] Skip compares with scalable vector types. Materializing scalable vectors with boolean values is not implemented yet. Skip those cases for now and leave a TODO. --- .../Scalar/ConstraintElimination.cpp | 7 ++++++ .../ConstraintElimination/geps-ptrvector.ll | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 375aa4e2cd440..a78bfbb54144b 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -754,6 +754,13 @@ static Constant *getScalarConstOrSplat(ConstantInt *C, Type *Ty) { static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); + + // TODO: Implement splat of boolean value for scalable vectors. + if (isa(Cmp->getType())) { + LLVM_DEBUG(dbgs() << " skipping due to scalable vectors\n"); + return false; + } + CmpInst::Predicate Pred = Cmp->getPredicate(); Value *A = Cmp->getOperand(0); Value *B = Cmp->getOperand(1); diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index df915653e08e1..0f5a28b3c5671 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -34,3 +34,26 @@ define <2 x i1> @test.vectorgep.ult.false(<2 x ptr> %vec) { %t.1 = icmp ult <2 x ptr> %gep.1, %vec ret <2 x i1> %t.1 } + + +define @test.scalable.vectorgep.ult.true( %vec) { +; CHECK-LABEL: @test.scalable.vectorgep.ult.true( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[VEC]], [[GEP_1]] +; CHECK-NEXT: ret [[T_1]] +; + %gep.1 = getelementptr inbounds i32, %vec, i64 1 + %t.1 = icmp ult %vec, %gep.1 + ret %t.1 +} + +define @test.scalable.vectorgep.ult.false( %vec) { +; CHECK-LABEL: @test.scalable.vectorgep.ult.false( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[GEP_1]], [[VEC]] +; CHECK-NEXT: ret [[T_1]] +; + %gep.1 = getelementptr inbounds i32, %vec, i64 1 + %t.1 = icmp ult %gep.1, %vec + ret %t.1 +} From 78ed64d89fd6ea348a963516a2e49028e4079f65 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Thu, 3 Nov 2022 00:12:10 +0000 Subject: [PATCH 100/516] [Driver] Don't preprocess source files when reproducing linker crashes It's not necessary to redo the source file preprocessing for reproducing linker crashes because we must have successfully created the object file by this point. Skip this step, and also don't report the preprocessed source file or create the clang invocation shell script. The latter is no longer sensible without the preprocessed source, or helpful given the linker reproducer will have it's own shell script. Differential Revision: https://reviews.llvm.org/D137289 --- clang/lib/Driver/Driver.cpp | 49 +++++++++++++++++++++-------------- clang/test/Driver/lld-repro.c | 16 ++++++++---- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 80e6ec76d16f7..5704902b1cc5a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1527,6 +1527,11 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, return false; } +static const char BugReporMsg[] = + "\n********************\n\n" + "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" + "Preprocessed source(s) and associated run script(s) are located at:"; + // When clang crashes, produce diagnostic information including the fully // preprocessed source file(s). Request that the developer attach the // diagnostic information to a bug report. @@ -1582,6 +1587,29 @@ void Driver::generateCompilationDiagnostics( // Suppress tool output. C.initCompilationForDiagnostics(); + // If lld failed, rerun it again with --reproduce. + if (IsLLD) { + const char *TmpName = CreateTempFile(C, "linker-crash", "tar"); + Command NewLLDInvocation = Cmd; + llvm::opt::ArgStringList ArgList = NewLLDInvocation.getArguments(); + StringRef ReproduceOption = + C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() + ? "/reproduce:" + : "--reproduce="; + ArgList.push_back(Saver.save(Twine(ReproduceOption) + TmpName).data()); + NewLLDInvocation.replaceArguments(std::move(ArgList)); + + // Redirect stdout/stderr to /dev/null. + NewLLDInvocation.Execute({None, {""}, {""}}, nullptr, nullptr); + Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; + Diag(clang::diag::note_drv_command_failed_diag_msg) << TmpName; + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "\n\n********************"; + if (Report) + Report->TemporaryFiles.push_back(TmpName); + return; + } + // Construct the list of inputs. InputList Inputs; BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs); @@ -1659,22 +1687,6 @@ void Driver::generateCompilationDiagnostics( return; } - // If lld failed, rerun it again with --reproduce. - if (IsLLD) { - const char *TmpName = CreateTempFile(C, "linker-crash", "tar"); - Command NewLLDInvocation = Cmd; - llvm::opt::ArgStringList ArgList = NewLLDInvocation.getArguments(); - StringRef ReproduceOption = - C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() - ? "/reproduce:" - : "--reproduce="; - ArgList.push_back(Saver.save(Twine(ReproduceOption) + TmpName).data()); - NewLLDInvocation.replaceArguments(std::move(ArgList)); - - // Redirect stdout/stderr to /dev/null. - NewLLDInvocation.Execute({None, {""}, {""}}, nullptr, nullptr); - } - const ArgStringList &TempFiles = C.getTempFiles(); if (TempFiles.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) @@ -1682,10 +1694,7 @@ void Driver::generateCompilationDiagnostics( return; } - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "\n********************\n\n" - "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" - "Preprocessed source(s) and associated run script(s) are located at:"; + Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; SmallString<128> VFS; SmallString<128> ReproCrashFilename; diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 7436d1a1f59be..1333f68d911ee 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -1,22 +1,28 @@ // REQUIRES: lld // UNSUPPORTED: ps4, ps5 -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t -fcrash-diagnostics=all 2>&1 \ +// RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ +// RUN: | sed -e 's/\\/\\\\/g' > %t.rsp + +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all 2>&1 \ +// RUN: | FileCheck %s + +// Test that the reproducer can still be created even when the input source cannot be preprocessed +// again, like when reading from stdin. +// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all 2>&1 < %s \ // RUN: | FileCheck %s // check that we still get lld's output // CHECK: error: undefined symbol: {{_?}}a // CHECK: Preprocessed source(s) and associated run script(s) are located at: -// CHECK-NEXT: note: diagnostic msg: {{.*}}lld-repro-{{.*}}.c // CHECK-NEXT: note: diagnostic msg: {{.*}}linker-crash-{{.*}}.tar -// CHECK-NEXT: note: diagnostic msg: {{.*}}lld-repro-{{.*}}.sh // CHECK-NEXT: note: diagnostic msg: // CHECK: ******************** -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t -fcrash-diagnostics=compiler 2>&1 \ +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t 2>&1 \ +// RUN: not %clang %s @%t.rsp 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER // NO-LINKER-NOT: Preprocessed source(s) and associated run script(s) are located at: From 520168024914d2e3f446959402f238862d5055b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Nov 2022 16:13:12 -0700 Subject: [PATCH 101/516] LangRef: Fix typo in backtick placement --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3a809603ddcc4..c7cfbf12eaa48 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3648,7 +3648,7 @@ The semantics of non-zero address spaces are target-specific. Memory access through a non-dereferenceable pointer is undefined behavior in any address space. Pointers with the bit-value 0 are only assumed to be non-dereferenceable in address space 0, unless the function is -marked with the ``null_pointer_is_valid attribute``. +marked with the ``null_pointer_is_valid`` attribute. If an object can be proven accessible through a pointer with a different address space, the access may be modified to use that From 01ccb23bd685268db179fd4d11922a39e28d5f30 Mon Sep 17 00:00:00 2001 From: Peixin-Qiao Date: Thu, 3 Nov 2022 09:00:52 +0800 Subject: [PATCH 102/516] [flang][RFC] Add lowering design for procdure pointers This document aims to give insights at the representation of procdure pointers in FIR. Reviewed By: PeteSteinfeld, jeanPerier, kiranchandramohan Differential Revision: https://reviews.llvm.org/D136840 --- flang/docs/ProcedurePointer.md | 486 +++++++++++++++++++++++++++++++++ 1 file changed, 486 insertions(+) create mode 100644 flang/docs/ProcedurePointer.md diff --git a/flang/docs/ProcedurePointer.md b/flang/docs/ProcedurePointer.md new file mode 100644 index 0000000000000..157d387c37094 --- /dev/null +++ b/flang/docs/ProcedurePointer.md @@ -0,0 +1,486 @@ + + +# Procedure Pointer + +A procedure pointer is a procedure that has the EXTERNAL and POINTER attributes. + +This document summarizes what of context the procedure pointers should appear, +and how they are lowered to FIR. + +The current plan is to use/extend the `BoxedProcedure` pass for the conversion +to LLVM IR, and thus will not be lowering the procedure-pointer-related +operations to LLVM IR in `CodeGen.cpp`. + +## Fortran standard + +Here is a list of the sections and constraints of the Fortran standard involved +for procedure pointers. + +- 8.5.4 Components + - C757 + - C758 + - C759 +- 8.5.9: EXTERNAL attribute +- 8.5.14: POINTER attribute + - C853 + - A procedure pointer shall not be referenced unless it is pointer associated + with a target procedure. +- 8.5.15 PROTECTED attribute + - C855 +- 8.5.16 SAVE attribute + - (4) A procedure pointer declared in the scoping unit of a main program, + module, or submodule implicitly has the SAVE attribute. +- 8.10.2.1 COMMON statement + - C8119 +- 10.2.2.2 Pointer assignment statement + - C1028 + - C1029 +- 10.2.2.4 Procedure pointer assignment +- 11.1.3 ASSOCIATE construct + - C1005 +- 12.6.3 Data transfer input/output list + - C1233 +- 15.2.2.4 Procedure pointers + - A procedure pointer may be pointer associated with an external procedure, an + internal procedure, an intrinsic procedure, a module procedure, or a dummy + procedure that is not a procedure pointer. +- 15.4.3.6 Procedure declaration statement +- 15.5.2.9(5) Actual arguments associated with dummy procedure entities +- 16.9.16 ASSOCIATED(POINTER [, TARGET]) + - POINTER may be a procedure pointer, and TARGET may be proc-target in a + pointer assignment statement (10.2.2). +- 16.9.144 NULL([MOLD]) + - MOLD may be a procedure pointer. +- 18.2.3.4 C_F_PROCPOINTER(CPTR, FPTR) + - FPTR shall be a procedure pointer, and not be a component of a coindexed + object. +- C.1.1 A procedure that is not a procedure pointer can be an actual argument + that corresponds to a procedure pointer dummy argument with the INTENT(IN) + attribute. + +--- + +## Representation in FIR + +### Procedure pointer `!fir.ref>` + +A procedure pointer may have an explicit or implicit interface. T in +`!fir.ref>` is the function type, which is `() -> ()` if the +procedure pointer has the implicit interface declared as +`procedure(), pointer :: p`. + +A procedure declaration statement specifies EXTERNAL attribute (8.5.9) for all +entities for all entities in the procedure declaration list. + +### Actual arguments associated with dummy procedure entities + +The actual argument may be a procedure pointer, a valid target for the dummy +pointer, a reference to the NULL() intrinsic, or a reference to a function that +returns a procedure pointer. + +If the interface is explicit, and the dummy argument is procedure pointer, the +reference is resolved as the pointer to the procedure; otherwise, the reference +is resolved as the pointer target. + +**Fortran case 1** +```fortran +subroutine proc_pointer_dummy_argument(p) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_dummy_argument +``` + +**FIR for case 1** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +func.func @proc_pointer_dummy_argument(%0 : !fir.ref) -> !fir.ref>>) { + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%1) : ((!fir.ref) -> !fir.ref) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +**Fortran case 2** +```fortran +subroutine proc_pointer_global() + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer, save :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_global +``` + +**FIR for case 2** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +fir.global internal @ProcedurePointer : !fir.boxproc<(!fir.ref) -> !fir.ref> { + %0 = fir.zero_bits (!fir.ref) -> !fir.ref + %1 = fir.emboxproc %0 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.has_value %1 : !fir.boxproc<(!fir.ref) -> !fir.ref> +} + +func.func @proc_pointer_global() { + %0 = fir.address_of(@ProcedurePointer) : !fir.ref) -> !fir.ref>> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%1) : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +**Fortran case 3** +```fortran +subroutine proc_pointer_local() + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_local +``` + +**FIR for case 3** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +func.func @proc_pointer_local() { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + %2 = fir.box_addr %1 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> ((!fir.ref) -> !fir.ref) + %3 = fir.zero_bits (!fir.ref) -> !fir.ref + fir.store %3 to %2 : !fir.ref<(!fir.ref) -> !fir.ref> + %4 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%4) : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +It is possible to pass procedure pointers to a C function. If the C function has +an explicit interface in fortran code, and the dummy argument is a procedure +pointer, the code passes a pointer to the procedure as the actual argument +(see Case 5); Otherwise, the code passes the procedure pointer target as the +actual argument (see Case 4). + +**Case 4** +```c +void func_(void (*foo)(int *)) { + int *x, y = 1; + x = &y; + foo(x); +} +``` +```fortran +program main + procedure(), pointer :: pp + pp=>print_x + call func(pp) +contains + subroutine print_x(x) + integer :: x + print *, x + end +end +``` + +Note that the internal procedure is not one good usage, but it works in +implementation. It is better to use BIND(C) external or module procedure as +right-hand side proc-target. + +**Case 5** +```c +void func_(void (**foo)(int *)) { + int *x, y = 1; + x = &y; + (*foo)(x); +} +``` +```fortran +program main + interface + subroutine func(p) + procedure(), pointer :: p + end + end interface + procedure(), pointer :: pp + pp=>print_x + call func(pp) +contains + subroutine print_x(x) + integer :: x + print *, x + end +end +``` + +Case 4 and Case 5 are not recommended from Fortran 2003 standard, which provides +the feature of interoperability with C to handle this. Specifically, +C_F_PROCPOINTER is used to associate a procedure pointer with the target of a C +function pointer. C_FUNPTR is also designed for interoperability with any C +function pointer type. + +### Procedure pointer to function returning a character type + +The dummy procedure pointer may not have a function type with an assumed length +due to C721 and C723. + +### Procedure pointer to internal procedure + +Initially the current plan is to implement pointers to internal procedures +using the LLVM Trampoline intrinsics. This has the drawback of requiring the +stack to be executable, which is a security hole. To avoid this, we will need +improve the implementation to use heap-resident thunks. + +### Procedure pointer assignment `p => proc` + +The right-hand side may be a procedure, a procedure pointer, or a function whose +result is a procedure pointer. + +The procedure could be a BIND(C) procedure. The lowering of it is the same as +that of an external or module procedure. The case of internal procedure has been +discussed above. + +```c +#include +void func_(int *x) { + printf("%d\n", *x); +} +``` +```fortran +program main + interface + subroutine func(x) bind(C) + integer :: x + end + end interface + procedure(func), bind(C, name="func_") :: proc + procedure(func), pointer :: pp + integer :: x = 5 + pp=>proc + call pp(x) +end +``` + +**Fortran case** +```fortran +subroutine proc_pointer_assignment(arg0, arg1) + interface + function func(x) + integer :: x + end + end interface + procedure(func), pointer :: arg0, arg1 + real, external, bind(C, name="Procedure") :: proc + arg0=>proc ! case 1 + arg0=>arg1 ! case 2 + arg0=>reffunc ! case 3 +contains + function reffunc() result(pp) + interface + function func(x) + integer :: x + end + end interface + procedure(func), pointer :: pp + end +end +function proc(x) bind(C, name="Procedure") + integer :: x + proc = real(x) +end +``` + +**FIR** +``` +func.func @Procedure(%arg0 : !fir.ref) -> !fir.ref { + %1 = fir.load %arg0 : !fir.ref + %2 = fir.convert %1 : (i32) -> f32 + return %2 : f32 +} + +func.func @Reference2Function() -> !fir.boxproc<(!fir.ref) -> !fir.ref> { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + return %1 : !fir.boxproc<(!fir.ref) -> !fir.ref> +} + +func.func @proc_pointer_assignment(%arg0 : !fir.ref) -> !fir.ref>>, %arg1 : !fir.ref) -> !fir.ref>>) { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> {bindc_name = ".result"} + // case 1: assignment from external procedure + %1 = fir.address_of(@Procedure) : (!fir.ref) -> !fir.ref + %2 = fir.emboxproc %1 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.store %2 to %arg0 : !fir.ref) -> !fir.ref>> + // case2: assignment from procdure pointer + %3 = fir.load %arg1 : !fir.ref) -> !fir.ref>> + fir.store %3 to %arg0 : !fir.ref) -> !fir.ref>> + // case3: assignment from a reference to a function whose result is a procedure pointer + %4 = fir.call @Reference2Function() : () -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.store %4 to %0 : !fir.ref) -> !fir.ref>> + %5 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.store %5 to %arg0 : !fir.ref) -> !fir.ref>> + return +} +``` + +### Procedure pointer components + +Having procedure pointers in derived types permits `methods` to be dynamically +bound to objects. Such procedure pointer components will have the type +!fir.boxproc. + +**Fortran** +```fortran +subroutine proc_pointer_component(a, i, f) + interface + function func(x) + integer :: x + end + end interface + type matrix + real :: element(2,2) + procedure(func), pointer, nopass :: solve + end type + integer :: i + procedure(func) :: f + type(matrix) :: a + a%solve=>f + r = a%solve(i) +end subroutine proc_pointer_component +``` + +**FIR** +``` +func.func @proc_pointer_component(%arg0 : (!fir.ref) -> !fir.ref, %arg1: !fir.ref) { + %0 = fir.alloca !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %1 = fir.field_index solve, !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %2 = fir.coordinate_of %0, %1 : (!fir.ref,solve:!fir.boxproc<() -> ()>}>>, !fir.field) -> !fir.ref ()>> + %3 = fir.emboxproc %arg0 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + %4 = fir.convert %3 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> !fir.boxproc<() -> ()> + fir.store %4 to %2 : !fir.ref ()>> + %4 = fir.field_index solve, !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %5 = fir.coordinate_of %0, %4 : (!fir.ref,solve:!fir.boxproc<() -> ()>}>>, !fir.field) -> !fir.ref ()>> + %6 = fir.load %5 : !fir.ref ()>> + %7 = fir.convert %6 : (!fir.boxproc<() -> ()>) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + %8 = fir.box_addr %7 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> ((!fir.ref) -> !fir.ref) + %9 = fir.call %8(%arg1) : (!fir.ref) -> !fir.ref + return +} +``` + +--- + +# Testing + +The lowering part is tested with LIT tests in tree, but the execution tests are +useful for full testing. + +LLVM IR testing is also helpful with the initial check. A C function pointer is +semantically equivalent to a Fortran procedure in LLVM IR level, and a pointer +to a C function pointer is semantically equivalent to a Fortran procedure +pointer in LLVM IR level. That is, a Fortran procedure will be converted to a +opaque pointer in LLVM IR level, which is the same for a C function pointer; +a Fortran procedure pointer will be converted to a opaque pointer pointing to +a opaque pointer, which is the same for a pointer to a C function pointer. + +The tests should include the following +- function result, subroutine/function arguments with varying types + - non-character scalar + - character (assumed-length and non-assumed-length) + - array (static and dynamic) + - character array + - derived type + - ... (polymorphic?) +- internal/external/module procedure or a C function as the target + - procedure pointer initialization + - procedure pointer assignment +- procedure pointer, procedure pointer target passed to a C function +- procedure pointer, procedure pointer target passed to a Fortran procedure +- procedure pointer component in derived types + +--- + +# Current TODOs +Current list of TODOs in lowering: +- `flang/lib/Lower/CallInterface.cpp:708`: not yet implemented: procedure pointer result not yet handled +- `flang/lib/Lower/CallInterface.cpp:961`: not yet implemented: procedure pointer arguments +- `flang/lib/Lower/CallInterface.cpp:993`: not yet implemented: procedure pointer results +- `flang/lib/Lower/ConvertExpr.cpp:1119`: not yet implemented: procedure pointer component in derived type assignment +- `flang/lib/Lower/ConvertType.cpp:228`: not yet implemented: procedure pointers +- `flang/lib/Lower/Bridge.cpp:2438`: not yet implemented: procedure pointer assignment +- `flang/lib/Lower/ConvertVariable.cpp:348`: not yet implemented: procedure pointer component default initialization +- `flang/lib/Lower/ConvertVariable.cpp:416`: not yet implemented: procedure pointer globals +- `flang/lib/Lower/ConvertVariable.cpp:1459`: not yet implemented: procedure pointers +- `flang/lib/Lower/HostAssociations.cpp:162`: not yet implemented: capture procedure pointer in internal procedure +- lowering of procedure pointers in ASSOCIATED, NULL, and C_F_PROCPOINTER + +Current list of TODOs in code generation: + +NOTE: There are any number of possible implementations. + +- `flang/lib/Optimizer/CodeGen/TypeConverter.h:64` TODO: BoxProcType type conversion +- `flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp:136` not yet implemented: record type with a boxproc type +- fir.global for procedure pointers + +or + +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:2080` not yet implemented: fir.emboxproc codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:629` not yet implemented: fir.boxproc_host codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:1078` not yet implemented: fir.len_param_index codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:3166` not yet implemented: fir.unboxproc codegen + +--- + +Resources: +- [1] Fortran standard From 1ada819c237bf724e6eaa1c82b2742e3eb57a5d5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 2 Nov 2022 19:21:33 -0700 Subject: [PATCH 103/516] [asan] Default to -fsanitize-address-use-odr-indicator for non-Windows This enables odr indicators on all platforms and private aliases on non-Windows. Note that GCC also uses private aliases: this fixes bogus `The following global variable is not properly aligned.` errors for interposed global variables Fix https://github.com/google/sanitizers/issues/398 Fix https://github.com/google/sanitizers/issues/1017 Fix https://github.com/llvm/llvm-project/issues/36893 (we can restore D46665) Global variables of non-hasExactDefinition() linkages (i.e. linkonce/linkonce_odr/weak/weak_odr/common/external_weak) are not instrumented. If an instrumented variable gets interposed to an uninstrumented variable due to symbol interposition (e.g. in issue 36893, _ZTS1A in foo.so is resolved to _ZTS1A in the executable), there may be a bogus error. With private aliases, the register code will not resolve to a definition in another module, and thus prevent the issue. Cons: minor size increase. This is mainly due to extra `__odr_asan_gen_*` symbols. (ELF) In addition, in relocatable files private aliases replace some relocations referencing global symbols with .L symbols and may introduce some STT_SECTION symbols. For lld, with -g0, the size increase is 0.07~0.09% for many configurations I have tested: -O0, -O1, -O2, -O3, -O2 -ffunction-sections -fdata-sections -Wl,--gc-sections. With -g1 or above, the size increase ratio will be even smaller. This patch obsoletes D92078. Don't migrate Windows for now: the static data member of a specialization `std::num_put::id` is a weak symbol, as well as its ODR indicator. Unfortunately, link.exe (and lld without -lldmingw) generally doesn't support duplicate weak definitions (weak symbols in different TUs likely pick different defined external symbols and conflict). Differential Revision: https://reviews.llvm.org/D137227 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/SanitizerArgs.cpp | 10 +++++++--- clang/test/CodeGen/asan-globals-odr.cpp | 7 +++---- clang/test/CodeGen/asan-static-odr.cpp | 4 ++-- clang/test/Driver/fsanitize.c | 19 +++++++------------ .../asan/TestCases/Linux/odr-violation.cpp | 17 ++++++++++------- .../asan/TestCases/Linux/odr_indicators.cpp | 2 +- .../Instrumentation/AddressSanitizer.h | 2 +- .../Instrumentation/AddressSanitizer.cpp | 14 +++++++++----- .../AddressSanitizer/global_metadata.ll | 4 ++-- .../AddressSanitizer/global_with_comdat.ll | 8 ++++---- .../AddressSanitizer/local_alias.ll | 2 +- .../AddressSanitizer/odr-check-ignore.ll | 4 ++-- .../AddressSanitizer/win-string-literal.ll | 2 +- 14 files changed, 51 insertions(+), 46 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b2f334f9f8144..3e15c55ed52ea 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1758,7 +1758,7 @@ defm sanitize_address_globals_dead_stripping : BoolOption<"f", "sanitize-address NegFlag>, Group; defm sanitize_address_use_odr_indicator : BoolOption<"f", "sanitize-address-use-odr-indicator", - CodeGenOpts<"SanitizeAddressUseOdrIndicator">, DefaultFalse, + CodeGenOpts<"SanitizeAddressUseOdrIndicator">, DefaultTrue, PosFlag, NegFlag>, diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index edbb7625aaf2f..a710081fa2d47 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -923,10 +923,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, !TC.getTriple().isOSBinFormatELF() || TC.getTriple().isOSFuchsia() || TC.getTriple().isPS()); + // Enable ODR indicators which allow better handling of mixed instrumented + // and uninstrumented globals. Disable them for Windows where weak odr + // indicators (.weak.__odr_asan_gen*) may cause multiple definition linker + // errors in the absence of -lldmingw. AsanUseOdrIndicator = Args.hasFlag(options::OPT_fsanitize_address_use_odr_indicator, options::OPT_fno_sanitize_address_use_odr_indicator, - AsanUseOdrIndicator); + !TC.getTriple().isOSWindows()); if (AllAddedKinds & SanitizerKind::PointerCompare & ~AllRemove) { AsanInvalidPointerCmp = true; @@ -1236,8 +1240,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (AsanGlobalsDeadStripping) CmdArgs.push_back("-fsanitize-address-globals-dead-stripping"); - if (AsanUseOdrIndicator) - CmdArgs.push_back("-fsanitize-address-use-odr-indicator"); + if (!AsanUseOdrIndicator) + CmdArgs.push_back("-fno-sanitize-address-use-odr-indicator"); if (AsanInvalidPointerCmp) { CmdArgs.push_back("-mllvm"); diff --git a/clang/test/CodeGen/asan-globals-odr.cpp b/clang/test/CodeGen/asan-globals-odr.cpp index d6b5ed7377fe5..e8fcc81516a57 100644 --- a/clang/test/CodeGen/asan-globals-odr.cpp +++ b/clang/test/CodeGen/asan-globals-odr.cpp @@ -1,12 +1,11 @@ -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 -// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 +// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 // RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 // RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 // RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 // No alias on Windows but indicators should work. -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,GLOB_VAR,ALIAS0 -// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_VAR_INDICATOR,ALIAS0 +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,GLOB_VAR,ALIAS0 +// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_VAR_INDICATOR,ALIAS0 int global; diff --git a/clang/test/CodeGen/asan-static-odr.cpp b/clang/test/CodeGen/asan-static-odr.cpp index a4ba1112db437..bd918243299c1 100644 --- a/clang/test/CodeGen/asan-static-odr.cpp +++ b/clang/test/CodeGen/asan-static-odr.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s // No alias on Windows but indicators should work. -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s static int global; diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1756a5dbfe798..d1254a2e47029 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -263,23 +263,18 @@ // RUN: FileCheck %s --check-prefix=CHECK-NO-CHECK-ASAN-CALLBACK // CHECK-NO-CHECK-ASAN-CALLBACK-NOT: "-mllvm" "-asan-instrumentation-with-call-threshold=0" -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// CHECK-ASAN-ODR-INDICATOR: -cc1{{.*}}-fsanitize-address-use-odr-indicator +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR +// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF +// CHECK-ASAN-ODR-INDICATOR-NOT: "-fsanitize-address-use-odr-indicator" // RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF // RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF -// CHECK-ASAN-ODR-INDICATOR-OFF-NOT: -cc1{{.*}}address-generate-odr-globals +// CHECK-ASAN-ODR-INDICATOR-OFF: "-cc1" {{.*}} "-fno-sanitize-address-use-odr-indicator" -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH -// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH -// CHECK-ASAN-ODR-INDICATOR-BOTH: -cc1{{.*}}-fsanitize-address-use-odr-indicator +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR +// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH-OFF -// CHECK-ASAN-ODR-INDICATOR-BOTH-OFF-NOT: -cc1{{.*}}address-generate-odr-globals - -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-WITHOUT-ODR-INDICATOR -// CHECK-ASAN-WITHOUT-ODR-INDICATOR-NOT: -cc1{{.*}}address-generate-odr-globals +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF // RUN: %clang --target=x86_64-linux-gnu -fsanitize-memory-track-origins -pie %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-TRACK-ORIGINS // CHECK-ONLY-TRACK-ORIGINS: warning: argument unused during compilation: '-fsanitize-memory-track-origins' diff --git a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp index 1c8ea99430df2..a86e032ecef6f 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp @@ -6,16 +6,19 @@ // We use fast_unwind_on_malloc=0 to have full unwinding even w/o frame // pointers. This setting is not on by default because it's too expensive. // +// Note, -asan-use-private-alias=1 -asan-use-odr-indicator=1 is the default. +// -fno-sanitize-address-use-odr-indicator turns off both. +// // Different size: detect a bug if detect_odr_violation>=1 -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -// RUN: %clangxx_asan -g %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -fno-sanitize-address-use-odr-indicator %s -o %dynamiclib +// RUN: %clangxx_asan -g -fno-sanitize-address-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=0 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s // // Same size: report a bug only if detect_odr_violation>=2. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -fno-sanitize-address-use-odr-indicator %s -o %dynamiclib -DSZ=100 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s @@ -26,13 +29,13 @@ // RUN: rm -f %t.supp // // Use private aliases for global variables without indicator symbol. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %dynamiclib -DSZ=100 -// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-odr-indicator=0 %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g -mllvm -asan-use-odr-indicator=0 %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // Use private aliases for global variables: use indicator symbol to detect ODR violation. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s -o %dynamiclib -DSZ=100 -// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s // Same as above but with clang switches. diff --git a/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp b/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp index 36176b552906d..583f6e662fda8 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx_asan -fPIC %s -o %t +// RUN: %clangxx_asan -fno-sanitize-address-use-odr-indicator -fPIC %s -o %t // RUN: %env_asan_opts=report_globals=2 %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,INDICATOR0 // RUN: %clangxx_asan -fsanitize-address-use-odr-indicator -fPIC %s -o %t diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 7558568b0c1e9..41ca345885083 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -36,7 +36,7 @@ struct AddressSanitizerOptions { class AddressSanitizerPass : public PassInfoMixin { public: AddressSanitizerPass(const AddressSanitizerOptions &Options, - bool UseGlobalGC = true, bool UseOdrIndicator = false, + bool UseGlobalGC = true, bool UseOdrIndicator = true, AsanDtorKind DestructorKind = AsanDtorKind::Global); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); void printPipeline(raw_ostream &OS, diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8cbe29a4c4ef2..921be7abfdba6 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -396,12 +396,12 @@ static cl::opt ClForceExperiment( static cl::opt ClUsePrivateAlias("asan-use-private-alias", cl::desc("Use private aliases for global variables"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClUseOdrIndicator("asan-use-odr-indicator", cl::desc("Use odr indicators to improve ODR reporting"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClUseGlobalsGC("asan-globals-live-support", @@ -767,15 +767,19 @@ class ModuleAddressSanitizer { public: ModuleAddressSanitizer(Module &M, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, - bool UseOdrIndicator = false, + bool UseOdrIndicator = true, AsanDtorKind DestructorKind = AsanDtorKind::Global) : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel), Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel), // Enable aliases as they should have no downside with ODR indicators. - UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), - UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), + UsePrivateAlias(ClUsePrivateAlias.getNumOccurrences() > 0 + ? ClUsePrivateAlias + : UseOdrIndicator), + UseOdrIndicator(ClUseOdrIndicator.getNumOccurrences() > 0 + ? ClUseOdrIndicator + : UseOdrIndicator), // Not a typo: ClWithComdat is almost completely pointless without // ClUseGlobalsGC (because then it only works on modules without // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll index 068c079151dda..47df1d43549ce 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll @@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" ; Check that globals were instrumented: -; CHECK: @global = global { i32, [28 x i8] } zeroinitializer, align 32 -; CHECK: @.str = internal constant { [14 x i8], [18 x i8] } { [14 x i8] c"Hello, world!\00", [18 x i8] zeroinitializer }, align 32 +; CHECK: @global = global { i32, [28 x i8] } zeroinitializer, comdat, align 32 +; CHECK: @.str = internal constant { [14 x i8], [18 x i8] } { [14 x i8] c"Hello, world!\00", [18 x i8] zeroinitializer }, comdat({{.*}}), align 32 ; Check emitted location descriptions: ; CHECK: [[VARNAME:@___asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll b/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll index 5831b81370680..47bb1f102e2fc 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll @@ -5,9 +5,9 @@ ; enabled as indicator symbols will cause link time odr violations. ; This is to fix PR 47925. ; -; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -S | FileCheck %s --check-prefixes=CHECK,NOCOMDAT +; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -asan-use-odr-indicator=0 -S | FileCheck %s --check-prefixes=CHECK,NOCOMDAT ; Check that enabling odr indicators enables comdat for globals. -; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK,COMDAT +; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -S | FileCheck %s --check-prefixes=CHECK,COMDAT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -43,8 +43,8 @@ target triple = "x86_64-unknown-linux-gnu" ; Check emitted location descriptions: ; CHECK: [[VARNAME:@___asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1 -; COMDAT: @__asan_global_global = {{.*}}i64 ptrtoint (ptr @global to i64){{.*}} section "asan_globals"{{.*}}, !associated -; COMDAT: @__asan_global_.str = {{.*}}i64 ptrtoint (ptr @{{.str|1}} to i64){{.*}} section "asan_globals"{{.*}}, !associated +; COMDAT: @__asan_global_global = {{.*}}i64 ptrtoint (ptr @__odr_asan_gen_global to i64){{.*}} section "asan_globals"{{.*}}, comdat($global), !associated +; COMDAT: @__asan_global_.str = {{.*}}i64 ptrtoint (ptr @___asan_gen_ to i64){{.*}} section "asan_globals"{{.*}}, comdat($.str.{{.*}}), !associated ; The metadata has to be inserted to llvm.compiler.used to avoid being stripped ; during LTO. diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll index 88525ae98a6a3..b772d5c76167c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll @@ -1,5 +1,5 @@ ; Defaults -; RUN: opt < %s -passes=asan -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -passes=asan -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR ; {newPM,legacyPM} x {alias0,alias1} x {odr0,odr1} ; RUN: opt < %s -passes=asan -asan-use-private-alias=0 -asan-use-odr-indicator=0 -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR diff --git a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll index ecd518b3d312d..3160802b1e09e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -passes=asan -asan-use-private-alias=0 -S | FileCheck %s --check-prefix=NOALIAS -; RUN: opt < %s -passes=asan -asan-use-private-alias=1 -S | FileCheck %s --check-prefix=ALIAS +; RUN: opt < %s -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=0 -S | FileCheck %s --check-prefix=NOALIAS +; RUN: opt < %s -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=1 -S | FileCheck %s --check-prefix=ALIAS target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll index 14c2aa891e958..6d1eec29796c5 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=asan -S | FileCheck %s +; RUN: opt < %s -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=0 -S | FileCheck %s ; Generated like so: ; $ clang -S -emit-llvm -Xclang -disable-llvm-passes -fsanitize=address -O1 t.cpp -o t.ll From d16b5c350473f04aae4f39344242e140b0dfd79d Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Thu, 3 Nov 2022 11:03:37 +0800 Subject: [PATCH 104/516] [asan] Use proper shadow offset for loongarch64 in instrumentation passes Instrumentation passes now use the proper shadow offset. There will be many asan test failures without this patch. For example: ``` $ ./lib/asan/tests/LOONGARCH64LinuxConfig/Asan-loongarch64-calls-Test AddressSanitizer:DEADLYSIGNAL ================================================================= ==651209==ERROR: AddressSanitizer: SEGV on unknown address 0x1ffffe2dfa9b (pc 0x5555585e151c bp 0x7ffffb9ec070 sp 0x7ffffb9ebfd0 T0) ==651209==The signal is caused by a UNKNOWN memory access. ``` Before the patch: ``` $ make check-asan Testing Time: 36.13s Unsupported : 205 Passed : 83 Expectedly Failed: 1 Failed : 239 ``` After the patch: ``` $ make check-asan Testing Time: 58.98s Unsupported : 205 Passed : 421 Expectedly Failed: 1 Failed : 89 ``` Differential Revision: https://reviews.llvm.org/D137013 --- .../Transforms/Instrumentation/AddressSanitizer.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 921be7abfdba6..ff05454aa920e 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -106,6 +106,7 @@ static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; +static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 46; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; @@ -484,6 +485,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsMIPS64 = TargetTriple.isMIPS64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64; bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; bool IsWindows = TargetTriple.isOSWindows(); bool IsFuchsia = TargetTriple.isOSFuchsia(); @@ -555,6 +557,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; + else if (IsLoongArch64) + Mapping.Offset = kLoongArch64_ShadowOffset64; else if (IsRISCV64) Mapping.Offset = kRISCV64_ShadowOffset64; else if (IsAMDGPU) @@ -573,12 +577,12 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, } // OR-ing shadow offset if more efficient (at least on x86) if the offset - // is a power of two, but on ppc64 we have to use add since the shadow - // offset is not necessary 1/8-th of the address space. On SystemZ, - // we could OR the constant in a single instruction, but it's more + // is a power of two, but on ppc64 and loongarch64 we have to use add since + // the shadow offset is not necessarily 1/8-th of the address space. On + // SystemZ, we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS && - !IsRISCV64 && + !IsRISCV64 && !IsLoongArch64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; bool IsAndroidWithIfuncSupport = From 061426df572552bc839e1a80cb29070d4242a32f Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Thu, 3 Nov 2022 11:07:53 +0800 Subject: [PATCH 105/516] [sanitizer] Fix vfork interception on loongarch64 Fix a brown paper bag error made by me in D129418. I didn't set ASAN_INTERCEPT_VFORK correctly for loongarch64, but created an all-zero object for __interception::real_vfork. This caused anything calling vfork() to die instantly. Fix this issue by setting ASAN_INTERCEPT_VFORK and remove the bad all-zero definition. Other ports have an all-zero common definition but we don't need it at least for now. And, enable ASAN vfork test for loongarch64 to prevent regression in the future. Differential Revision: https://reviews.llvm.org/D137160 --- compiler-rt/lib/asan/asan_interceptors.h | 2 +- .../sanitizer_common_interceptors_vfork_loongarch64.inc.S | 6 ------ compiler-rt/test/asan/TestCases/Linux/vfork.cpp | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 35727a96497dc..c4bf087ea17f0 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -114,7 +114,7 @@ void InitializePlatformInterceptors(); #if SANITIZER_LINUX && \ (defined(__arm__) || defined(__aarch64__) || defined(__i386__) || \ - defined(__x86_64__) || SANITIZER_RISCV64) + defined(__x86_64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) # define ASAN_INTERCEPT_VFORK 1 #else # define ASAN_INTERCEPT_VFORK 0 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S index 05192485d5971..68782acb379d1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -5,12 +5,6 @@ ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) ASM_HIDDEN(_ZN14__interception10real_vforkE) -.bss -.type _ZN14__interception10real_vforkE, @object -.size _ZN14__interception10real_vforkE, 8 -_ZN14__interception10real_vforkE: - .zero 8 - .text .globl ASM_WRAPPER_NAME(vfork) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) diff --git a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp index 4c0f02c5088e4..b943e4debce2c 100644 --- a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp @@ -1,7 +1,7 @@ // https://github.com/google/sanitizers/issues/925 // RUN: %clang_asan -O0 %s -o %t && %run %t 2>&1 -// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch +// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch || loongarch64-target-arch #include #include From 1024497fffd18bba92485852cfedebb829499953 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 3 Nov 2022 10:39:31 +0800 Subject: [PATCH 106/516] [X86] Remove `IntrArgMemOnly` from cmpccxadd intrinsics CMPSXADD will modify memory, so we can't use `IntrArgMemOnly` here. Found it during review D137250. --- llvm/include/llvm/IR/IntrinsicsX86.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index f6cc72928fd26..4a836e9e917c4 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5364,13 +5364,13 @@ def int_x86_cmpccxadd32 : ClangBuiltin<"__builtin_ia32_cmpccxadd32">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg>]>; + [ImmArg>]>; def int_x86_cmpccxadd64 : ClangBuiltin<"__builtin_ia32_cmpccxadd64">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg>]>; + [ImmArg>]>; } //===----------------------------------------------------------------------===// let TargetPrefix = "x86" in { From 5adb090795914a4122492117f3af8dacaf69301f Mon Sep 17 00:00:00 2001 From: wanglei Date: Thu, 3 Nov 2022 11:10:41 +0800 Subject: [PATCH 107/516] [LoongArch] Fix codegen for [su]itofp instructions This patch fixes codegen for `[su]itofp` instructions. In LoongArch, a legal int-to-float conversion is done in two steps: 1. Move the data from `GPR` to `FPR`. (FRLen >= GRLen) 2. Conversion in `FPR`. (the data in `FPR` is treated as a signed value) Based on the above features, when the type's BitWidth meets the requirements, all `SINT_TO_FP` are legal, all `UINT_TO_FP` are expand and lowered to libcall when appropriate. The only special case is, LoongArch64 with `+f,-d` features. At this point, custom processing is required for `[SU]INT_TO_FP`. Of course, we can also ignore it and use libcall directly. Differential Revision: https://reviews.llvm.org/D136916 --- .../LoongArch/LoongArchFloat32InstrInfo.td | 6 +- .../LoongArch/LoongArchFloat64InstrInfo.td | 4 +- .../LoongArch/LoongArchISelDAGToDAG.cpp | 6 ++ .../LoongArch/LoongArchISelLowering.cpp | 72 ++++++++++++++++--- .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../ir-instruction/double-convert.ll | 13 +--- .../LoongArch/ir-instruction/float-convert.ll | 66 ++++++----------- 7 files changed, 100 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 0e2f57551f334..3767fc1d793ba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -284,7 +284,11 @@ def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), (MOVFR2GR_S FPR32:$src)>; // int -> f32 -def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +def : Pat<(f32 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +// uint -> f32 +def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; } // Predicates = [HasBasicF, IsLA64] // FP Rounding diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index d061b6426e244..44a80054f0a90 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -288,13 +288,11 @@ def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; /// Convert int to FP let Predicates = [HasBasicD, IsLA64] in { +def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_L (MOVGR2FR_D GPR:$src))>; def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; -def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), - (FFINT_D_W (MOVGR2FR_W GPR:$src))>; - def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; } // Predicates = [HasBasicD, IsLA64] let Predicates = [HasBasicD, IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index 415ca4d871cda..8ba1f9c1b27f6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -213,6 +213,12 @@ bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { Val = N.getOperand(0); return true; } + if (N.getOpcode() == LoongArchISD::BSTRPICK && + N.getConstantOperandVal(1) < UINT64_C(0X1F) && + N.getConstantOperandVal(2) == UINT64_C(0)) { + Val = N; + return true; + } MVT VT = N.getSimpleValueType(); if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { Val = N; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 6f477413636a9..49e8ce02abccd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -21,6 +21,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/Support/Debug.h" @@ -160,7 +161,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MUL_I128, nullptr); setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); + if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD())) { + setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + } // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -220,6 +226,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBITCAST(Op, DAG); case ISD::UINT_TO_FP: return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::FRAMEADDR: @@ -302,19 +310,61 @@ SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + if (Op0->getOpcode() == ISD::AND) { + auto *C = dyn_cast(Op0.getOperand(1)); + if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) + return Op; + } + + if (Op0->getOpcode() == LoongArchISD::BSTRPICK && + Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && + Op0.getConstantOperandVal(2) == UINT64_C(0)) + return Op; + + if (Op0.getOpcode() == ISD::AssertZext && + dyn_cast(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) + return Op; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; +} + +SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); SDLoc DL(Op); - auto &TLI = DAG.getTargetLoweringInfo(); - SDValue Tmp1, Tmp2; - SDValue Op1 = Op.getOperand(0); - if (Op1->getOpcode() == ISD::AssertZext || - Op1->getOpcode() == ISD::AssertSext) + SDValue Op0 = Op.getOperand(0); + + if ((Op0.getOpcode() == ISD::AssertSext || + Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && + dyn_cast(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) return Op; - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); - SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); - SDNode *N = Res.getNode(); - TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); - return Tmp1; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; } SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 1e411fb34f727..358da7feb20b6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -171,6 +171,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll index 070027b698436..03a126a736efc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll @@ -242,16 +242,9 @@ define double @convert_u32_to_double(i32 %a) nounwind { ; ; LA64-LABEL: convert_u32_to_double: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -; LA64-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -; LA64-NEXT: fld.d $fa0, $a1, 0 -; LA64-NEXT: lu52i.d $a1, $zero, 1107 -; LA64-NEXT: movgr2fr.d $fa1, $a1 -; LA64-NEXT: fsub.d $fa0, $fa1, $fa0 -; LA64-NEXT: lu12i.w $a1, 275200 -; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ffint.d.l $fa0, $fa0 ; LA64-NEXT: ret %1 = uitofp i32 %a to double ret double %1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll index 559206a4db5ac..a18cb6f756a60 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll @@ -376,14 +376,12 @@ define float @convert_i32_to_float(i32 %a) nounwind { ; ; LA64F-LABEL: convert_i32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.w $a0, $a0, 0 ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.w $a0, $a0, 0 ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 ; LA64D-NEXT: ret @@ -412,14 +410,17 @@ define float @convert_i64_to_float(i64 %a) nounwind { ; ; LA64F-LABEL: convert_i64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatdisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i64_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: ret %1 = sitofp i64 %a to float ret float %1 @@ -514,34 +515,19 @@ define float @convert_u32_to_float(i32 %a) nounwind { ; ; LA64F-LABEL: convert_u32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64F-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64D-NEXT: andi $a2, $a0, 1 -; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 -; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: ret %1 = uitofp i32 %a to float ret float %1 @@ -568,17 +554,11 @@ define float @convert_u64_to_float(i64 %a) nounwind { ; ; LA64F-LABEL: convert_u64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: srli.d $a1, $a0, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u64_to_float: @@ -586,13 +566,13 @@ define float @convert_u64_to_float(i64 %a) nounwind { ; LA64D-NEXT: srli.d $a1, $a0, 1 ; LA64D-NEXT: andi $a2, $a0, 1 ; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: movgr2fr.d $fa0, $a1 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: slti $a1, $a0, 0 ; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.s.l $fa1, $fa1 ; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 ; LA64D-NEXT: ret %1 = uitofp i64 %a to float From 8cdee2eadabfaada866a69ed7d0eda96b9567465 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 1 Nov 2022 15:12:43 -0700 Subject: [PATCH 108/516] [NFC][flang] Lowering options clean-up. This change-set defines the LoweringOptions the same way other options are defined in Flang. Differential Revision: https://reviews.llvm.org/D137207 --- flang/include/flang/Lower/LoweringOptions.def | 35 ++++++++++++++ flang/include/flang/Lower/LoweringOptions.h | 48 ++++++++----------- flang/lib/Lower/CMakeLists.txt | 1 + flang/lib/Lower/CallInterface.cpp | 2 +- flang/lib/Lower/ConvertType.cpp | 3 +- flang/lib/Lower/LoweringOptions.cpp | 23 +++++++++ flang/unittests/Frontend/CMakeLists.txt | 1 + 7 files changed, 82 insertions(+), 31 deletions(-) create mode 100644 flang/include/flang/Lower/LoweringOptions.def create mode 100644 flang/lib/Lower/LoweringOptions.cpp diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def new file mode 100644 index 0000000000000..2a89308467fd9 --- /dev/null +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -0,0 +1,35 @@ +//===--- LoweringOptions.def - Lowering options database ---------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the lowering options. Users of this file must define +/// LOWERINGOPT macro to make use of this information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LOWERINGOPT +# error Define the LOWERINGOPT macro to handle lowering options +#endif + +#ifndef ENUM_LOWERINGOPT +# define ENUM_LOWERINGOPT(Name, Type, Bits, Default) \ +LOWERINGOPT(Name, Bits, Default) +#endif + +/// If true, lower transpose without a runtime call. +ENUM_LOWERINGOPT(OptimizeTranspose, unsigned, 1, 1) + +/// If true, enable polymorphic type lowering feature. Off by default. +ENUM_LOWERINGOPT(PolymorphicTypeImpl, unsigned, 1, 0) + +/// If true, lower to High level FIR before lowering to FIR. +/// Off by default until fully ready. +ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0) + +#undef LOWERINGOPT +#undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Lower/LoweringOptions.h b/flang/include/flang/Lower/LoweringOptions.h index d882ff0fb233a..dd297e41bded2 100644 --- a/flang/include/flang/Lower/LoweringOptions.h +++ b/flang/include/flang/Lower/LoweringOptions.h @@ -17,39 +17,31 @@ namespace Fortran::lower { -class LoweringOptions { - /// If true, lower transpose without a runtime call. - unsigned optimizeTranspose : 1; - - /// If true, enable polymorphic type lowering feature. Off by default. - unsigned polymorphicTypeImpl : 1; +class LoweringOptionsBase { +public: +#define LOWERINGOPT(Name, Bits, Default) unsigned Name : Bits; +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) +#include "flang/Lower/LoweringOptions.def" + +protected: +#define LOWERINGOPT(Name, Bits, Default) +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) unsigned Name : Bits; +#include "flang/Lower/LoweringOptions.def" +}; - /// If true, lower to High level FIR before lowering to FIR. - /// Off by default until fully ready. - unsigned lowerToHighLevelFIR : 1; +class LoweringOptions : public LoweringOptionsBase { public: - LoweringOptions() - : optimizeTranspose(true), polymorphicTypeImpl(false), - lowerToHighLevelFIR(false) {} - - bool getOptimizeTranspose() const { return optimizeTranspose; } - LoweringOptions &setOptimizeTranspose(bool v) { - optimizeTranspose = v; - return *this; +#define LOWERINGOPT(Name, Bits, Default) +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ + LoweringOptions &set##Name(Type Value) { \ + Name = static_cast(Value); \ + return *this; \ } +#include "flang/Lower/LoweringOptions.def" - bool isPolymorphicTypeImplEnabled() const { return polymorphicTypeImpl; } - LoweringOptions &setPolymorphicTypeImpl(bool v) { - polymorphicTypeImpl = v; - return *this; - } - - bool getLowerToHighLevelFIR() const { return lowerToHighLevelFIR; } - LoweringOptions &setLowerToHighLevelFIR(bool v) { - lowerToHighLevelFIR = v; - return *this; - } + LoweringOptions(); }; } // namespace Fortran::lower diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index cfc2e28aee344..183bf6478e75c 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(FortranLower IntrinsicCall.cpp IO.cpp IterationSpace.cpp + LoweringOptions.cpp Mangler.cpp OpenACC.cpp OpenMP.cpp diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 190c5619dface..20258f37a8126 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -829,7 +829,7 @@ class Fortran::lower::CallInterfaceImpl { if (cat == Fortran::common::TypeCategory::Derived) { // TODO is kept under experimental flag until feature is complete. if (dynamicType.IsPolymorphic() && - !getConverter().getLoweringOptions().isPolymorphicTypeImplEnabled()) + !getConverter().getLoweringOptions().getPolymorphicTypeImpl()) TODO(interface.converter.getCurrentLocation(), "support for polymorphic types"); diff --git a/flang/lib/Lower/ConvertType.cpp b/flang/lib/Lower/ConvertType.cpp index 1d838df2022a9..e9a2e339e7876 100644 --- a/flang/lib/Lower/ConvertType.cpp +++ b/flang/lib/Lower/ConvertType.cpp @@ -234,8 +234,7 @@ struct TypeBuilder { translateLenParameters(params, tySpec->category(), ultimate); ty = genFIRType(context, tySpec->category(), kind, params); } else if (type->IsPolymorphic() && - !converter.getLoweringOptions() - .isPolymorphicTypeImplEnabled()) { + !converter.getLoweringOptions().getPolymorphicTypeImpl()) { // TODO is kept under experimental flag until feature is complete. TODO(loc, "support for polymorphic types"); } else if (type->IsUnlimitedPolymorphic()) { diff --git a/flang/lib/Lower/LoweringOptions.cpp b/flang/lib/Lower/LoweringOptions.cpp new file mode 100644 index 0000000000000..22247faa4cab9 --- /dev/null +++ b/flang/lib/Lower/LoweringOptions.cpp @@ -0,0 +1,23 @@ +//===--- LoweringOptions.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Lower/LoweringOptions.h" + +namespace Fortran::lower { + +LoweringOptions::LoweringOptions() { +#define LOWERINGOPT(Name, Bits, Default) Name = Default; +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) set##Name(Default); +#include "flang/Lower/LoweringOptions.def" +} + +} // namespace Fortran::lower diff --git a/flang/unittests/Frontend/CMakeLists.txt b/flang/unittests/Frontend/CMakeLists.txt index 739412c7888c4..0a05b3ffd743e 100644 --- a/flang/unittests/Frontend/CMakeLists.txt +++ b/flang/unittests/Frontend/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(FlangFrontendTests clangBasic flangFrontend flangFrontendTool + FortranLower FortranParser FortranSemantics FortranCommon From 00b09a7b18abb253d36b3d3e1c546007288f6e89 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 3 Nov 2022 00:01:51 -0500 Subject: [PATCH 109/516] Revert "[AAPointerInfo] refactor how offsets and Access objects are tracked" This reverts commit b756096b0cbef0918394851644649b3c28a886e2. See regression https://github.com/llvm/llvm-project/issues/58774 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 61 +---- llvm/lib/Transforms/IPO/Attributor.cpp | 2 +- .../Transforms/IPO/AttributorAttributes.cpp | 244 +++++++++--------- .../Attributor/call-simplify-pointer-info.ll | 154 +---------- .../Attributor/value-simplify-pointer-info.ll | 4 - 5 files changed, 145 insertions(+), 320 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 69e52581e1d97..61c26dfabed0b 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -250,22 +250,6 @@ struct OffsetAndSize { return OAS.Offset + OAS.Size > Offset && OAS.Offset < Offset + Size; } - OffsetAndSize &operator&=(const OffsetAndSize &R) { - if (Offset == Unassigned) - Offset = R.Offset; - else if (R.Offset != Unassigned && R.Offset != Offset) - Offset = Unknown; - - if (Size == Unassigned) - Size = R.Size; - else if (Size == Unknown || R.Size == Unknown) - Size = Unknown; - else if (R.Size != Unassigned) - Size = std::max(Size, R.Size); - - return *this; - } - /// Constants used to represent special offsets or sizes. /// - This assumes that Offset and Size are non-negative. /// - The constants should not clash with DenseMapInfo, such as EmptyKey @@ -5008,47 +4992,33 @@ struct AAPointerInfo : public AbstractAttribute { /// An access description. struct Access { - Access(Instruction *I, int64_t Offset, int64_t Size, - Optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), OAS(Offset, Size), - Kind(Kind), Ty(Ty) { + Access(Instruction *I, Optional Content, AccessKind Kind, Type *Ty) + : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { verify(); } - Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, - int64_t Size, Optional Content, AccessKind Kind, Type *Ty) - : LocalI(LocalI), RemoteI(RemoteI), Content(Content), OAS(Offset, Size), - Kind(Kind), Ty(Ty) { + Access(Instruction *LocalI, Instruction *RemoteI, Optional Content, + AccessKind Kind, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), + Ty(Ty) { verify(); } Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - OAS(Other.OAS), Kind(Other.Kind), Ty(Other.Ty) {} + Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && OAS == R.OAS && + return LocalI == R.LocalI && RemoteI == R.RemoteI && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } Access &operator&=(const Access &R) { assert(RemoteI == R.RemoteI && "Expected same instruction!"); - assert(LocalI == R.LocalI && "Expected same instruction!"); + Content = + AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); Kind = AccessKind(Kind | R.Kind); - auto Before = OAS; - OAS &= R.OAS; - if (Before.isUnassigned() || Before == OAS) { - Content = - AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); - } else { - // Since the OAS information changed, set a conservative state -- drop - // the contents, and assume MayAccess rather than MustAccess. - Content.reset(); - Kind = AccessKind(Kind | AK_MAY); - Kind = AccessKind(Kind & ~AK_MUST); - } - verify(); return *this; } @@ -5096,12 +5066,6 @@ struct AAPointerInfo : public AbstractAttribute { /// determined. Optional getContent() const { return Content; } - /// Return the offset for this access. - int64_t getOffset() const { return OAS.Offset; } - - /// Return the size for this access. - int64_t getSize() const { return OAS.Size; } - private: /// The instruction responsible for the access with respect to the local /// scope of the associated attribute. @@ -5114,9 +5078,6 @@ struct AAPointerInfo : public AbstractAttribute { /// cannot be determined. Optional Content; - /// The object accessed, in terms of an offset and size in bytes. - AA::OffsetAndSize OAS; - /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; @@ -5152,7 +5113,7 @@ struct AAPointerInfo : public AbstractAttribute { virtual bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB, bool &HasBeenWrittenTo, - AA::OffsetAndSize &OAS) const = 0; + AA::OffsetAndSize *OASPtr = nullptr) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 2935127dcbc48..e8d7fe2212251 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -457,7 +457,7 @@ static bool getPotentialCopiesOfMemoryValue( auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, - HasBeenWrittenTo, OAS)) { + HasBeenWrittenTo, &OAS)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index cbc1f8b77c02f..c1850f7378127 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -757,6 +757,13 @@ struct AccessAsInstructionInfo : DenseMapInfo { /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { + + ~State() { + // We do not delete the Accesses objects but need to destroy them still. + for (auto &It : AccessBins) + It.second->~Accesses(); + } + /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -768,7 +775,9 @@ struct AA::PointerInfo::State : public AbstractState { } State() = default; - State(State &&SIS) = default; + State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { + SIS.AccessBins.clear(); + } const State &getAssumed() const { return *this; } @@ -794,9 +803,7 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; BS = R.BS; - AccessList = R.AccessList; - OffsetBins = R.OffsetBins; - RemoteIMap = R.RemoteIMap; + AccessBins = R.AccessBins; return *this; } @@ -804,52 +811,99 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; std::swap(BS, R.BS); - std::swap(AccessList, R.AccessList); - std::swap(OffsetBins, R.OffsetBins); - std::swap(RemoteIMap, R.RemoteIMap); + std::swap(AccessBins, R.AccessBins); return *this; } - /// Add a new Access to the state at offset \p Offset and with size \p Size. + bool operator==(const State &R) const { + if (BS != R.BS) + return false; + if (AccessBins.size() != R.AccessBins.size()) + return false; + auto It = begin(), RIt = R.begin(), E = end(); + while (It != E) { + if (It->getFirst() != RIt->getFirst()) + return false; + auto &Accs = It->getSecond(); + auto &RAccs = RIt->getSecond(); + if (Accs->size() != RAccs->size()) + return false; + for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) + if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) + return false; + ++It; + ++RIt; + } + return true; + } + bool operator!=(const State &R) const { return !(*this == R); } + + /// We store accesses in a set with the instruction as key. + struct Accesses { + SmallVector Accesses; + DenseMap Map; + + unsigned size() const { return Accesses.size(); } + + using vec_iterator = decltype(Accesses)::iterator; + vec_iterator begin() { return Accesses.begin(); } + vec_iterator end() { return Accesses.end(); } + + using iterator = decltype(Map)::const_iterator; + iterator find(AAPointerInfo::Access &Acc) { + return Map.find(Acc.getRemoteInst()); + } + iterator find_end() { return Map.end(); } + + AAPointerInfo::Access &get(iterator &It) { + return Accesses[It->getSecond()]; + } + + void insert(AAPointerInfo::Access &Acc) { + Map[Acc.getRemoteInst()] = Accesses.size(); + Accesses.push_back(Acc); + } + }; + + /// We store all accesses in bins denoted by their offset and size. + using AccessBinsTy = DenseMap; + + AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } + AccessBinsTy::const_iterator end() const { return AccessBins.end(); } + +protected: + /// The bins with all the accesses for the associated pointer. + AccessBinsTy AccessBins; + + /// Add a new access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and - /// is of kind \p Kind. If an Access already exists for the same \p I and same - /// \p RemoteI, the two are combined, potentially losing information about - /// offset and size. The resulting access must now be moved from its original - /// OffsetBin to the bin for its new offset. - /// + /// is of kind \p Kind. /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, - Instruction *RemoteI = nullptr); - - using OffsetBinsTy = DenseMap>; - - using const_bin_iterator = OffsetBinsTy::const_iterator; - const_bin_iterator begin() const { return OffsetBins.begin(); } - const_bin_iterator end() const { return OffsetBins.end(); } - - const AAPointerInfo::Access &getAccess(unsigned Index) const { - return AccessList[Index]; + Instruction *RemoteI = nullptr, + Accesses *BinPtr = nullptr) { + AA::OffsetAndSize Key{Offset, Size}; + Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; + if (!Bin) + Bin = new (A.Allocator) Accesses; + AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); + // Check if we have an access for this instruction in this bin, if not, + // simply add it. + auto It = Bin->find(Acc); + if (It == Bin->find_end()) { + Bin->insert(Acc); + return ChangeStatus::CHANGED; + } + // If the existing access is the same as then new one, nothing changed. + AAPointerInfo::Access &Current = Bin->get(It); + AAPointerInfo::Access Before = Current; + // The new one will be combined with the existing one. + Current &= Acc; + return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } -protected: - // Every memory instruction results in an Access object. We maintain a list of - // all Access objects that we own, along with the following maps: - // - // - OffsetBins: OffsetAndSize -> { Access } - // - RemoteIMap: RemoteI x LocalI -> Access - // - // A RemoteI is any instruction that accesses memory. RemoteI is different - // from LocalI if and only if LocalI is a call; then RemoteI is some - // instruction in the callgraph starting from LocalI. Multiple paths in the - // callgraph from LocalI to RemoteI may produce multiple accesses, but these - // are all combined into a single Access object. This may result in loss of - // information in OffsetAndSize in the Access object. - SmallVector AccessList; - OffsetBinsTy OffsetBins; - DenseMap> RemoteIMap; - /// See AAPointerInfo::forallInterferingAccesses. bool forallInterferingAccesses( AA::OffsetAndSize OAS, @@ -857,16 +911,14 @@ struct AA::PointerInfo::State : public AbstractState { if (!isValidState()) return false; - for (const auto &It : OffsetBins) { + for (const auto &It : AccessBins) { AA::OffsetAndSize ItOAS = It.getFirst(); if (!OAS.mayOverlap(ItOAS)) continue; bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto Index : It.getSecond()) { - auto &Access = AccessList[Index]; + for (auto &Access : *It.getSecond()) if (!CB(Access, IsExact)) return false; - } } return true; } @@ -875,19 +927,32 @@ struct AA::PointerInfo::State : public AbstractState { bool forallInterferingAccesses( Instruction &I, function_ref CB, - AA::OffsetAndSize &OAS) const { + AA::OffsetAndSize *OASPtr) const { if (!isValidState()) return false; - auto LocalList = RemoteIMap.find(&I); - if (LocalList == RemoteIMap.end()) { - return true; + // First find the offset and size of I. + AA::OffsetAndSize OAS; + for (const auto &It : AccessBins) { + for (auto &Access : *It.getSecond()) { + if (Access.getRemoteInst() == &I) { + OAS = It.getFirst(); + break; + } + } + if (OAS.Size != AA::OffsetAndSize::Unassigned) + break; } - for (auto LI : LocalList->getSecond()) { - auto &Access = AccessList[LI]; - OAS &= {Access.getOffset(), Access.getSize()}; - } + if (OASPtr) + *OASPtr = OAS; + + // No access for I was found, we are done. + if (OAS.Size == AA::OffsetAndSize::Unassigned) + return true; + + // Now that we have an offset and size, find all overlapping ones and use + // the callback on the accesses. return forallInterferingAccesses(OAS, CB); } @@ -896,56 +961,6 @@ struct AA::PointerInfo::State : public AbstractState { BooleanState BS; }; -ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset, - int64_t Size, Instruction &I, - Optional Content, - AAPointerInfo::AccessKind Kind, - Type *Ty, Instruction *RemoteI) { - RemoteI = RemoteI ? RemoteI : &I; - AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty); - - // Check if we have an access for this instruction, if not, simply add it. - auto &LocalList = RemoteIMap[RemoteI]; - bool AccExists = false; - unsigned AccIndex = AccessList.size(); - for (auto Index : LocalList) { - auto &A = AccessList[Index]; - if (A.getLocalInst() == &I) { - AccExists = true; - AccIndex = Index; - break; - } - } - if (!AccExists) { - AccessList.push_back(Acc); - LocalList.push_back(AccIndex); - } else { - // The new one will be combined with the existing one. - auto &Current = AccessList[AccIndex]; - auto Before = Current; - Current &= Acc; - if (Current == Before) - return ChangeStatus::UNCHANGED; - - Acc = Current; - AA::OffsetAndSize Key{Before.getOffset(), Before.getSize()}; - assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); - auto &Bin = OffsetBins[Key]; - assert(Bin.count(AccIndex) && - "Expected bin to actually contain the Access."); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access " - << AccessList[AccIndex] << " with key {" << Key.Offset - << ',' << Key.Size << "}\n"); - Bin.erase(AccIndex); - } - - AA::OffsetAndSize Key{Acc.getOffset(), Acc.getSize()}; - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc - << " with key {" << Key.Offset << ',' << Key.Size << "}\n"); - OffsetBins[Key].insert(AccIndex); - return ChangeStatus::CHANGED; -} - namespace { struct AAPointerInfoImpl : public StateWrapper { @@ -956,7 +971,7 @@ struct AAPointerInfoImpl const std::string getAsStr() const override { return std::string("PointerInfo ") + (isValidState() ? (std::string("#") + - std::to_string(OffsetBins.size()) + " bins") + std::to_string(AccessBins.size()) + " bins") : ""); } @@ -975,7 +990,7 @@ struct AAPointerInfoImpl bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB, bool &HasBeenWrittenTo, - AA::OffsetAndSize &OAS) const override { + AA::OffsetAndSize *OASPtr = nullptr) const override { HasBeenWrittenTo = false; SmallPtrSet DominatingWrites; @@ -1090,7 +1105,7 @@ struct AAPointerInfoImpl InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(I, AccessCB, OAS)) + if (!State::forallInterferingAccesses(I, AccessCB, OASPtr)) return false; if (HasBeenWrittenTo) { @@ -1157,15 +1172,14 @@ struct AAPointerInfoImpl // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - const auto &State = OtherAAImpl.getState(); - for (const auto &It : State) { + for (const auto &It : OtherAAImpl.getState()) { AA::OffsetAndSize OAS = AA::OffsetAndSize::getUnknown(); if (Offset != AA::OffsetAndSize::Unknown && !It.first.offsetOrSizeAreUnknown()) { OAS = AA::OffsetAndSize(It.first.Offset + Offset, It.first.Size); } - for (auto Index : It.getSecond()) { - const auto &RAcc = State.getAccess(Index); + Accesses *Bin = AccessBins.lookup(OAS); + for (const AAPointerInfo::Access &RAcc : *It.second) { if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; @@ -1178,8 +1192,9 @@ struct AAPointerInfoImpl AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } - Changed = Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst()); + Changed = + Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst(), Bin); } } return Changed; @@ -1191,11 +1206,10 @@ struct AAPointerInfoImpl /// Dump the state into \p O. void dumpState(raw_ostream &O) { - for (auto &It : OffsetBins) { + for (auto &It : AccessBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size - << "] : " << It.getSecond().size() << "\n"; - for (auto AccIndex : It.getSecond()) { - auto &Acc = AccessList[AccIndex]; + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) O << " --> " << *Acc.getRemoteInst() diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll index 30a8283475178..e75489a03ceac 100644 --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CGSCC ; @@ -53,7 +53,7 @@ define i8 @call_simplifiable_1() { ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -64,68 +64,6 @@ entry: ret i8 %r } -;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. - -define internal i8 @read_arg_1(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@read_arg_1 -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; CGSCC-NEXT: ret i8 [[L]] -; -entry: - %l = load i8, i8* %p, align 1 - ret i8 %l -} - -define internal i8 @sum_two_same_loads(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; CGSCC-NEXT: ret i8 [[Z]] -; - %x = call i8 @read_arg_1(i8* %p) - %y = call i8 @read_arg_1(i8* %p) - %z = add nsw i8 %x, %y - ret i8 %z -} - -define i8 @call_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: ret i8 4 -; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn -; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_2 -; CGSCC-SAME: () #[[ATTR1]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] -; CGSCC-NEXT: ret i8 [[R]] -; -entry: - %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %r = call i8 @sum_two_same_loads(i8* %i0) - ret i8 %r -} - define i8 @call_not_simplifiable_1() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1 @@ -144,7 +82,7 @@ define i8 @call_not_simplifiable_1() { ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -155,89 +93,6 @@ entry: ret i8 %r } -;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. - -define internal i8 @read_arg_2(i8* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; TUNIT-NEXT: ret i8 [[L]] -; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@read_arg_2 -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; CGSCC-NEXT: ret i8 [[L]] -; -entry: - %l = load i8, i8* %p, align 1 - ret i8 %l -} - -define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]] -; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]] -; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; TUNIT-NEXT: ret i8 [[Z]] -; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR3]] -; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; CGSCC-NEXT: ret i8 [[Z]] -; - %x = call i8 @read_arg_2(i8* %p) - %y = call i8 @read_arg_2(i8* %q) - %z = add nsw i8 %x, %y - ret i8 %z -} - -define i8 @call_not_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1 -; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]] -; TUNIT-NEXT: ret i8 [[R]] -; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn -; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2 -; CGSCC-SAME: () #[[ATTR1]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]] -; CGSCC-NEXT: ret i8 [[R]] -; -entry: - %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 - %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1) - ret i8 %r -} - ;. ; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } @@ -245,6 +100,5 @@ entry: ;. ; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index a8d1f549dfed4..cd992da4c9f98 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -2223,7 +2223,6 @@ define i8 @phi_no_store_2() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2242,7 +2241,6 @@ define i8 @phi_no_store_2() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2283,7 +2281,6 @@ define i8 @phi_no_store_3() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2305,7 +2302,6 @@ define i8 @phi_no_store_3() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 From 0041b92cefbb550dd4e754a1ff44337fc32698a2 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 3 Nov 2022 08:12:19 +0100 Subject: [PATCH 110/516] [mlir][Bazel] Add mlir-translate as data for dialect tests. --- utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index bbf8a100475a5..6e997e9f47e77 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -11,6 +11,7 @@ package(default_visibility = ["//visibility:public"]) data = [ "//llvm:llvm-symbolizer", "//mlir:mlir-opt", + "//mlir:mlir-translate", "//mlir/test:lit_data", ], ) From a20112a74cb34fa967d10e07185167cbc2906c0d Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Fri, 28 Oct 2022 15:51:20 +0100 Subject: [PATCH 111/516] [AArch64]SME2 instructions that use ZTO operand This patch adds the assembly/disassembly for the following instructions: ZERO (ZT0): Zero ZT0. LDR (ZT0): Load ZT0 register. STR (ZT0): Store ZT0 register. MOVT (scalar to ZT0): Move 8 bytes from general-purpose register to ZT0. (ZT0 to scalar): Move 8 bytes from ZT0 to general-purpose register. Consecutive: LUTI2 (single): Lookup table read with 2-bit indexes. (two registers): Lookup table read with 2-bit indexes. (four registers): Lookup table read with 2-bit indexes. LUTI4 (single): Lookup table read with 4-bit indexes. (two registers): Lookup table read with 4-bit indexes. (four registers): Lookup table read with 4-bit indexes. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 This patch also adds a new register class and operand for zt0 and a another index operand uimm3s8 Differential Revision: https://reviews.llvm.org/D136088 --- .../lib/Target/AArch64/AArch64InstrFormats.td | 12 + .../lib/Target/AArch64/AArch64RegisterInfo.td | 6 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 ++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 58 ++++- .../Disassembler/AArch64Disassembler.cpp | 3 + .../MCTargetDesc/AArch64InstPrinter.cpp | 5 +- .../AArch64/MCTargetDesc/AArch64InstPrinter.h | 1 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 188 ++++++++++++++ llvm/test/MC/AArch64/SME2/ldr-diagnostics.s | 6 + llvm/test/MC/AArch64/SME2/ldr.s | 38 +++ llvm/test/MC/AArch64/SME2/luti2-diagnostics.s | 60 +++++ llvm/test/MC/AArch64/SME2/luti2.s | 238 ++++++++++++++++++ llvm/test/MC/AArch64/SME2/luti4-diagnostics.s | 60 +++++ llvm/test/MC/AArch64/SME2/luti4.s | 213 ++++++++++++++++ llvm/test/MC/AArch64/SME2/movt-diagnostics.s | 32 +++ llvm/test/MC/AArch64/SME2/movt.s | 63 +++++ llvm/test/MC/AArch64/SME2/str-diagnostics.s | 6 + llvm/test/MC/AArch64/SME2/str.s | 38 +++ llvm/test/MC/AArch64/SME2/zero.s | 20 ++ 19 files changed, 1059 insertions(+), 4 deletions(-) create mode 100644 llvm/test/MC/AArch64/SME2/ldr-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/ldr.s create mode 100644 llvm/test/MC/AArch64/SME2/luti2-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/luti2.s create mode 100644 llvm/test/MC/AArch64/SME2/luti4-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/luti4.s create mode 100644 llvm/test/MC/AArch64/SME2/movt-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/movt.s create mode 100644 llvm/test/MC/AArch64/SME2/str-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/str.s create mode 100644 llvm/test/MC/AArch64/SME2/zero.s diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 19ff399d5ed4a..b7c1cb6a47831 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1408,6 +1408,18 @@ def sme_elm_idx0_15 : Operand, TImmLeaf; + +def uimm3s8 : Operand, ImmLeaf= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> { + let PrintMethod = "printVectorIndex<8>"; + let ParserMatchClass = UImm3s8Operand; +} + class UImmScaledMemoryIndexedRange : AsmOperandClass { let Name = "UImm" # Width # "s" # Scale # "Range"; let DiagnosticType = "InvalidMemoryIndexedRange" # Scale # "UImm" # Width; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 8a41cd5c4d49b..b541deb837795 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1390,6 +1390,8 @@ let SubRegIndices = [zasubb] in { def ZA : AArch64Reg<0, "za", [ZAB0]>; } +def ZT0 : AArch64Reg<0, "zt0">; + // SME Register Classes let isAllocatable = 0 in { @@ -1416,6 +1418,10 @@ let isAllocatable = 0 in { } } +def ZTR : RegisterClass<"AArch64", [untyped], 512, (add ZT0)> { + let Size = 512; + let DiagnosticType = "InvalidLookupTable"; +} // SME Register Operands // There are three types of SME matrix register operands: // * Tiles: diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 5a8149cf38500..55dc210c22fb7 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -559,6 +559,22 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001>; defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100>; defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101>; + +def ZERO_T : sme2_zero_zt<"zero", 0b0001>; + +def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>; +def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>; + +def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; +def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; + +defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">; +defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">; +defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">; + +defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">; +defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">; +defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">; } let Predicates = [HasSME2, HasSMEI16I64] in { diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 49d54f8e9a849..578c5140fd30b 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -70,7 +70,8 @@ enum class RegKind { SVEDataVector, SVEPredicateAsCounter, SVEPredicateVector, - Matrix + Matrix, + LookupTable }; enum class MatrixKind { Array, Tile, Row, Col }; @@ -265,6 +266,7 @@ class AArch64AsmParser : public MCTargetAsmParser { template OperandMatchResultTy tryParseGPROperand(OperandVector &Operands); + OperandMatchResultTy tryParseZTOperand(OperandVector &Operands); template OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands); template @@ -2786,9 +2788,12 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, if ((RegNum = matchMatrixRegName(Name))) return Kind == RegKind::Matrix ? RegNum : 0; + if (Name.equals_insensitive("zt0")) + return Kind == RegKind::LookupTable ? AArch64::ZT0 : 0; + // The parsed register must be of RegKind Scalar if ((RegNum = MatchRegisterName(Name))) - return Kind == RegKind::Scalar ? RegNum : 0; + return (Kind == RegKind::Scalar) ? RegNum : 0; if (!RegNum) { // Handle a few common aliases of registers. @@ -3966,6 +3971,9 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) { if (!tryParseNeonVectorRegister(Operands)) return false; + if (tryParseZTOperand(Operands) == MatchOperand_Success) + return false; + // Otherwise try for a scalar register. if (tryParseGPROperand(Operands) == MatchOperand_Success) return false; @@ -4179,6 +4187,10 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands, llvm_unreachable("Expected a valid vector kind"); } + if (RegTok.is(AsmToken::Identifier) && ParseRes == MatchOperand_NoMatch && + RegTok.getString().equals_insensitive("zt0")) + return MatchOperand_NoMatch; + if (RegTok.isNot(AsmToken::Identifier) || ParseRes == MatchOperand_ParseFail || (ParseRes == MatchOperand_NoMatch && NoMatchIsError && @@ -4328,6 +4340,42 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy +AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { + SMLoc StartLoc = getLoc(); + const AsmToken &Tok = getTok(); + StringRef Name = Tok.getString().lower(); + + unsigned RegNum = matchRegisterNameAlias(Name, RegKind::LookupTable); + + if (RegNum == 0) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg( + RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext())); + Lex(); // Eat identifier token. + + // Check if register is followed by an index + if (parseOptionalToken(AsmToken::LBrac)) { + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return MatchOperand_NoMatch; + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + if (parseToken(AsmToken::RBrac, "']' expected")) + return MatchOperand_ParseFail; + + Operands.push_back(AArch64Operand::CreateImm( + MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc, + getLoc(), getContext())); + } + + return MatchOperand_Success; +} + template OperandMatchResultTy AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) { @@ -5434,6 +5482,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, return Error(Loc, "index must be a multiple of 16 in range [-1024, 1008]."); case Match_InvalidMemoryIndexed8UImm5: return Error(Loc, "index must be a multiple of 8 in range [0, 248]."); + case Match_InvalidMemoryIndexed8UImm3: + return Error(Loc, "index must be a multiple of 8 in range [0, 56]."); case Match_InvalidMemoryIndexed4UImm5: return Error(Loc, "index must be a multiple of 4 in range [0, 124]."); case Match_InvalidMemoryIndexed2UImm5: @@ -5762,6 +5812,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, return Error(Loc, "Invalid vector list, expected list with 4 consecutive " "SVE vectors, where the first vector is a multiple of 4 " "and with matching element types"); + case Match_InvalidLookupTable: + return Error(Loc, "Invalid lookup table, expected zt0"); default: llvm_unreachable("unexpected error code!"); } @@ -6176,6 +6228,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMemoryIndexed8SImm7: case Match_InvalidMemoryIndexed16SImm7: case Match_InvalidMemoryIndexed8UImm5: + case Match_InvalidMemoryIndexed8UImm3: case Match_InvalidMemoryIndexed4UImm5: case Match_InvalidMemoryIndexed2UImm5: case Match_InvalidMemoryIndexed1UImm6: @@ -6318,6 +6371,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSVCR: case Match_InvalidMatrixIndexGPR32_12_15: case Match_InvalidMatrixIndexGPR32_8_11: + case Match_InvalidLookupTable: case Match_InvalidSVEVectorListMul2x8: case Match_InvalidSVEVectorListMul2x16: case Match_InvalidSVEVectorListMul2x32: diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 1438f026f6792..0ef906b583c06 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -329,6 +329,9 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, case AArch64::MPR8RegClassID: MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0)); break; + case AArch64::ZTRRegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0)); + break; } } else if (Desc.OpInfo[i].OperandType == AArch64::OPERAND_IMPLICIT_IMM_0) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 9324ee41c3123..c11150a5a1230 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1244,7 +1244,7 @@ void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, O << ']'; } -template +template void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1576,10 +1576,11 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, printVectorList(MI, OpNum, STI, O, Suffix); } +template void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - O << "[" << MI->getOperand(OpNum).getImm() << "]"; + O << "[" << Scale * MI->getOperand(OpNum).getImm() << "]"; } void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index 7bfb095b5873c..aa4aad1dc7fc7 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -164,6 +164,7 @@ class AArch64InstPrinter : public MCInstPrinter { void printTypedVectorList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template void printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printMatrixIndex(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 98f1c861a5c26..b3941fdaeaa61 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -2379,3 +2379,191 @@ multiclass sme2_int_mopx_tile op> { multiclass sme2_bfp_mopx_tile op> { def NAME : sme_outer_product_widening_inst; } + +//===----------------------------------------------------------------------===/// +// SME2 Zero Lookup Table. +class sme2_zero_zt opc> + : I<(outs ZTR:$ZT), (ins ), + mnemonic, "\t\\{ $ZT \\}", + "", []>, Sched<[]> { + let Inst{31-4} = 0b1100000001001000000000000000; + let Inst{3-0} = opc; +} + +//===----------------------------------------------------------------------===// +// SME2 lookup table load/store +class sme2_spill_fill_vector opc> + : I, Sched<[]> { + bits<5> Rn; + let Inst{31-22} = 0b1110000100; + let Inst{21-16} = opc{7-2}; + let Inst{15-10} = 0b100000; + let Inst{9-5} = Rn; + let Inst{4-2} = 0b000; + let Inst{1-0} = opc{1-0}; + + let mayLoad = !not(opc{7}); + let mayStore = opc{7}; +} + +//===----------------------------------------------------------------------===/// +// SME2 move to/from lookup table +class sme2_movt_zt_to_scalar opc> + : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), + mnemonic, "\t$Rt, $ZTt$imm3", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011000; + let Inst{14-12} = imm3; + let Inst{11-5} = opc; + let Inst{4-0} = Rt; +} + +class sme2_movt_scalar_to_zt opc> + : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), + mnemonic, "\t$ZTt$imm3, $Rt", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011100; + let Inst{14-12} = imm3; + let Inst{11-5} = opc; + let Inst{4-0} = Rt; +} + +//===----------------------------------------------------------------------===// +// SME2 lookup table expand one register +class sme2_luti_vector_index sz, bits<7> opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<5> Zd; + let Inst{31-19} = 0b1100000011001; + let Inst{18-14} = opc{6-2}; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +class sme2_luti2_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<4> i; + let Inst{17-14} = i; +} + +multiclass sme2_luti2_vector_index { + def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; +} + +class sme2_luti4_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<3> i; + let Inst{16-14} = i; +} + +multiclass sme2_luti4_vector_index { + def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; +} + +// SME2 lookup table expand two contiguous registers +class sme2_luti_vector_vg2_index sz, bits<6> opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<4> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{18-15} = opc{5-2}; + let Inst{14} = 0b1; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +class sme2_luti2_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<3> i; + let Inst{17-15} = i; +} + +multiclass sme2_luti2_vector_vg2_index { + def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<2> i; + let Inst{16-15} = i; +} + +multiclass sme2_luti4_vector_vg2_index { + def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +// SME2 lookup table expand four contiguous registers +class sme2_luti_vector_vg4_index sz, bits<5>opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<3> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{18-16} = opc{4-2}; + let Inst{15-14} = 0b10; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +class sme2_luti2_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<2> i; + let Inst{17-16} = i; +} + +multiclass sme2_luti2_vector_vg4_index { + def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<1> i; + let Inst{16} = i; +} + +multiclass sme2_luti4_vector_vg4_index { + def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} diff --git a/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s new file mode 100644 index 0000000000000..29c19316529fc --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +ldr zt1, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid lookup table, expected zt0 +// CHECK-NEXT: ldr zt1, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/ldr.s b/llvm/test/MC/AArch64/SME2/ldr.s new file mode 100644 index 0000000000000..4038b39ad3f92 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldr zt0, [x0] // 11100001-00011111-10000000-00000000 +// CHECK-INST: ldr zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8000 + +ldr zt0, [x10] // 11100001-00011111-10000001-01000000 +// CHECK-INST: ldr zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8140 + +ldr zt0, [x13] // 11100001-00011111-10000001-10100000 +// CHECK-INST: ldr zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f81a0 + +ldr zt0, [sp] // 11100001-00011111-10000011-11100000 +// CHECK-INST: ldr zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s new file mode 100644 index 0000000000000..730f3e7532335 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti2 z0.h, zt0, z0[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.h, zt0, z0[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti2.s b/llvm/test/MC/AArch64/SME2/luti2.s new file mode 100644 index 0000000000000..c622ed0a4285c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2.s @@ -0,0 +1,238 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti2 z0.h, zt0, z0[0] // 11000000-11001100-00010000-00000000 +// CHECK-INST: luti2 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc1000 + +luti2 z21.h, zt0, z10[5] // 11000000-11001101-01010001-01010101 +// CHECK-INST: luti2 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd5155 + +luti2 z23.h, zt0, z13[3] // 11000000-11001100-11010001-10110111 +// CHECK-INST: luti2 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccd1b7 + +luti2 z31.h, zt0, z31[15] // 11000000-11001111-11010011-11111111 +// CHECK-INST: luti2 z31.h, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xd3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfd3ff + + +luti2 z0.s, zt0, z0[0] // 11000000-11001100-00100000-00000000 +// CHECK-INST: luti2 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc2000 + +luti2 z21.s, zt0, z10[5] // 11000000-11001101-01100001-01010101 +// CHECK-INST: luti2 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd6155 + +luti2 z23.s, zt0, z13[3] // 11000000-11001100-11100001-10110111 +// CHECK-INST: luti2 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cce1b7 + +luti2 z31.s, zt0, z31[15] // 11000000-11001111-11100011-11111111 +// CHECK-INST: luti2 z31.s, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xe3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfe3ff + + +luti2 z0.b, zt0, z0[0] // 11000000-11001100-00000000-00000000 +// CHECK-INST: luti2 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc0000 + +luti2 z21.b, zt0, z10[5] // 11000000-11001101-01000001-01010101 +// CHECK-INST: luti2 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd4155 + +luti2 z23.b, zt0, z13[3] // 11000000-11001100-11000001-10110111 +// CHECK-INST: luti2 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccc1b7 + +luti2 z31.b, zt0, z31[15] // 11000000-11001111-11000011-11111111 +// CHECK-INST: luti2 z31.b, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xc3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfc3ff + + +luti2 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001100-01010000-00000000 +// CHECK-INST: luti2 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c5000 + +luti2 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001101-01010001-01010100 +// CHECK-INST: luti2 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d5154 + +luti2 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001100-11010001-10110110 +// CHECK-INST: luti2 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cd1b6 + +luti2 {z30.h - z31.h}, zt0, z31[7] // 11000000-10001111-11010011-11111110 +// CHECK-INST: luti2 { z30.h, z31.h }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xd3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fd3fe + + +luti2 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001100-01100000-00000000 +// CHECK-INST: luti2 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c6000 + +luti2 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001101-01100001-01010100 +// CHECK-INST: luti2 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d6154 + +luti2 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001100-11100001-10110110 +// CHECK-INST: luti2 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ce1b6 + +luti2 {z30.s - z31.s}, zt0, z31[7] // 11000000-10001111-11100011-11111110 +// CHECK-INST: luti2 { z30.s, z31.s }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xe3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fe3fe + + +luti2 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001100-01000000-00000000 +// CHECK-INST: luti2 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c4000 + +luti2 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001101-01000001-01010100 +// CHECK-INST: luti2 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d4154 + +luti2 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001100-11000001-10110110 +// CHECK-INST: luti2 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cc1b6 + +luti2 {z30.b - z31.b}, zt0, z31[7] // 11000000-10001111-11000011-11111110 +// CHECK-INST: luti2 { z30.b, z31.b }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xc3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fc3fe + + +luti2 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001100-10010000-00000000 +// CHECK-INST: luti2 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c9000 + +luti2 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001101-10010001-01010100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d9154 + +luti2 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001100-10010001-10110100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c91b4 + +luti2 {z28.h - z31.h}, zt0, z31[3] // 11000000-10001111-10010011-11111100 +// CHECK-INST: luti2 { z28.h - z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x93,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f93fc + + +luti2 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001100-10100000-00000000 +// CHECK-INST: luti2 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca000 + +luti2 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001101-10100001-01010100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08da154 + +luti2 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001100-10100001-10110100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca1b4 + +luti2 {z28.s - z31.s}, zt0, z31[3] // 11000000-10001111-10100011-11111100 +// CHECK-INST: luti2 { z28.s - z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0xa3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fa3fc + + +luti2 {z0.b - z3.b}, zt0, z0[0] // 11000000-10001100-10000000-00000000 +// CHECK-INST: luti2 { z0.b - z3.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x80,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c8000 + +luti2 {z20.b - z23.b}, zt0, z10[1] // 11000000-10001101-10000001-01010100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x81,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d8154 + +luti2 {z20.b - z23.b}, zt0, z13[0] // 11000000-10001100-10000001-10110100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x81,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c81b4 + +luti2 {z28.b - z31.b}, zt0, z31[3] // 11000000-10001111-10000011-11111100 +// CHECK-INST: luti2 { z28.b - z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x83,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f83fc + diff --git a/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s new file mode 100644 index 0000000000000..7a8590d0bac29 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti4 z0.h, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.h, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti4.s b/llvm/test/MC/AArch64/SME2/luti4.s new file mode 100644 index 0000000000000..c784d823eba45 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti4 z0.h, zt0, z0[0] // 11000000-11001010-00010000-00000000 +// CHECK-INST: luti4 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca1000 + +luti4 z21.h, zt0, z10[5] // 11000000-11001011-01010001-01010101 +// CHECK-INST: luti4 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb5155 + +luti4 z23.h, zt0, z13[3] // 11000000-11001010-11010001-10110111 +// CHECK-INST: luti4 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cad1b7 + +luti4 z31.h, zt0, z31[7] // 11000000-11001011-11010011-11111111 +// CHECK-INST: luti4 z31.h, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xd3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbd3ff + + +luti4 z0.s, zt0, z0[0] // 11000000-11001010-00100000-00000000 +// CHECK-INST: luti4 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca2000 + +luti4 z21.s, zt0, z10[5] // 11000000-11001011-01100001-01010101 +// CHECK-INST: luti4 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb6155 + +luti4 z23.s, zt0, z13[3] // 11000000-11001010-11100001-10110111 +// CHECK-INST: luti4 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cae1b7 + +luti4 z31.s, zt0, z31[7] // 11000000-11001011-11100011-11111111 +// CHECK-INST: luti4 z31.s, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xe3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbe3ff + + +luti4 z0.b, zt0, z0[0] // 11000000-11001010-00000000-00000000 +// CHECK-INST: luti4 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca0000 + +luti4 z21.b, zt0, z10[5] // 11000000-11001011-01000001-01010101 +// CHECK-INST: luti4 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb4155 + +luti4 z23.b, zt0, z13[3] // 11000000-11001010-11000001-10110111 +// CHECK-INST: luti4 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cac1b7 + +luti4 z31.b, zt0, z31[7] // 11000000-11001011-11000011-11111111 +// CHECK-INST: luti4 z31.b, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xc3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbc3ff + + +luti4 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001010-01010000-00000000 +// CHECK-INST: luti4 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a5000 + +luti4 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001011-01010001-01010100 +// CHECK-INST: luti4 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b5154 + +luti4 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001010-11010001-10110110 +// CHECK-INST: luti4 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ad1b6 + +luti4 {z30.h - z31.h}, zt0, z31[3] // 11000000-10001011-11010011-11111110 +// CHECK-INST: luti4 { z30.h, z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xd3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bd3fe + + +luti4 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001010-01100000-00000000 +// CHECK-INST: luti4 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a6000 + +luti4 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001011-01100001-01010100 +// CHECK-INST: luti4 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b6154 + +luti4 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001010-11100001-10110110 +// CHECK-INST: luti4 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ae1b6 + +luti4 {z30.s - z31.s}, zt0, z31[3] // 11000000-10001011-11100011-11111110 +// CHECK-INST: luti4 { z30.s, z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xe3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08be3fe + + +luti4 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001010-01000000-00000000 +// CHECK-INST: luti4 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a4000 + +luti4 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001011-01000001-01010100 +// CHECK-INST: luti4 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b4154 + +luti4 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001010-11000001-10110110 +// CHECK-INST: luti4 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ac1b6 + +luti4 {z30.b - z31.b}, zt0, z31[3] // 11000000-10001011-11000011-11111110 +// CHECK-INST: luti4 { z30.b, z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xc3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bc3fe + + +luti4 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001010-10010000-00000000 +// CHECK-INST: luti4 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a9000 + +luti4 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001011-10010001-01010100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b9154 + +luti4 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001010-10010001-10110100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a91b4 + +luti4 {z28.h - z31.h}, zt0, z31[1] // 11000000-10001011-10010011-11111100 +// CHECK-INST: luti4 { z28.h - z31.h }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0x93,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b93fc + + +luti4 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001010-10100000-00000000 +// CHECK-INST: luti4 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa000 + +luti4 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001011-10100001-01010100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba154 + +luti4 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001010-10100001-10110100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa1b4 + +luti4 {z28.s - z31.s}, zt0, z31[1] // 11000000-10001011-10100011-11111100 +// CHECK-INST: luti4 { z28.s - z31.s }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0xa3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba3fc + diff --git a/llvm/test/MC/AArch64/SME2/movt-diagnostics.s b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s new file mode 100644 index 0000000000000..d3696bf114623 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +// index must be a multiple of 8 in range [0, 56]. +// --------------------------------------------------------------------------// + +movt x0, zt0[57] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[57] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[58] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[58] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[64] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[64] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[72] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[72] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid zt0 register + +movt x0, zt1[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected token in argument list +// CHECK-NEXT: movt x0, zt1[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/movt.s b/llvm/test/MC/AArch64/SME2/movt.s new file mode 100644 index 0000000000000..a673eceeebed4 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movt x0, zt0[0] // 11000000-01001100-00000011-11100000 +// CHECK-INST: movt x0, zt0[0] +// CHECK-ENCODING: [0xe0,0x03,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c03e0 + +movt x21, zt0[40] // 11000000-01001100-01010011-11110101 +// CHECK-INST: movt x21, zt0[40] +// CHECK-ENCODING: [0xf5,0x53,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c53f5 + +movt x23, zt0[48] // 11000000-01001100-01100011-11110111 +// CHECK-INST: movt x23, zt0[48] +// CHECK-ENCODING: [0xf7,0x63,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c63f7 + +movt xzr, zt0[56] // 11000000-01001100-01110011-11111111 +// CHECK-INST: movt xzr, zt0[56] +// CHECK-ENCODING: [0xff,0x73,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c73ff + + +movt zt0[0], x0 // 11000000-01001110-00000011-11100000 +// CHECK-INST: movt zt0[0], x0 +// CHECK-ENCODING: [0xe0,0x03,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e03e0 + +movt zt0[40], x21 // 11000000-01001110-01010011-11110101 +// CHECK-INST: movt zt0[40], x21 +// CHECK-ENCODING: [0xf5,0x53,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e53f5 + +movt zt0[48], x23 // 11000000-01001110-01100011-11110111 +// CHECK-INST: movt zt0[48], x23 +// CHECK-ENCODING: [0xf7,0x63,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e63f7 + +movt zt0[56], xzr // 11000000-01001110-01110011-11111111 +// CHECK-INST: movt zt0[56], xzr +// CHECK-ENCODING: [0xff,0x73,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e73ff + diff --git a/llvm/test/MC/AArch64/SME2/str-diagnostics.s b/llvm/test/MC/AArch64/SME2/str-diagnostics.s new file mode 100644 index 0000000000000..00659829fe616 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +str zt, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid lookup table, expected zt0 +// CHECK-NEXT: str zt, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/str.s b/llvm/test/MC/AArch64/SME2/str.s new file mode 100644 index 0000000000000..97abc4ce01cd6 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +str zt0, [x0] // 11100001-00111111-10000000-00000000 +// CHECK-INST: str zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8000 + +str zt0, [x10] // 11100001-00111111-10000001-01000000 +// CHECK-INST: str zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8140 + +str zt0, [x13] // 11100001-00111111-10000001-10100000 +// CHECK-INST: str zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f81a0 + +str zt0, [sp] // 11100001-00111111-10000011-11100000 +// CHECK-INST: str zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/zero.s b/llvm/test/MC/AArch64/SME2/zero.s new file mode 100644 index 0000000000000..511aff8d57e8c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zero.s @@ -0,0 +1,20 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zero {zt0} // 11000000-01001000-00000000-00000001 +// CHECK-INST: zero { zt0 } +// CHECK-ENCODING: [0x01,0x00,0x48,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0480001 + From fdab9f1203eea48a7b8e4c55c7ceafc54653797c Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Thu, 3 Nov 2022 12:26:49 +0700 Subject: [PATCH 112/516] [Clang] Check for response file existence prior to check for recursion As now errors in file operation are handled, check for file existence must be done prior to check for recursion, otherwise reported errors are misleading. Differential Revision: https://reviews.llvm.org/D136090 --- clang/test/Driver/Inputs/inc-inexistent.rsp | 1 + clang/test/Driver/response-file-errs.c | 15 +++++ clang/tools/driver/driver.cpp | 2 +- clang/unittests/Driver/ToolChainTest.cpp | 7 ++- llvm/lib/Support/CommandLine.cpp | 61 +++++++++++---------- 5 files changed, 52 insertions(+), 34 deletions(-) create mode 100644 clang/test/Driver/Inputs/inc-inexistent.rsp create mode 100644 clang/test/Driver/response-file-errs.c diff --git a/clang/test/Driver/Inputs/inc-inexistent.rsp b/clang/test/Driver/Inputs/inc-inexistent.rsp new file mode 100644 index 0000000000000..c9ecfdf88ddd0 --- /dev/null +++ b/clang/test/Driver/Inputs/inc-inexistent.rsp @@ -0,0 +1 @@ +@inexistent.txt diff --git a/clang/test/Driver/response-file-errs.c b/clang/test/Driver/response-file-errs.c new file mode 100644 index 0000000000000..c0e02a984b9af --- /dev/null +++ b/clang/test/Driver/response-file-errs.c @@ -0,0 +1,15 @@ +// If response file does not exist, '@file; directive remains unexpanded in +// command line. +// +// RUN: %clang @%S/Inputs/inexistent.rsp -### 2>&1 | FileCheck --check-prefix=INEXISTENT %s +// INEXISTENT: @{{.*}}Inputs/inexistent.rsp + +// As the above case but '@file' is in response file. +// +// RUN: %clang @%S/Inputs/inc-inexistent.rsp -### 2>&1 | FileCheck --check-prefix=INEXISTENT2 %s +// INEXISTENT2: @{{.*}}inexistent.txt + +// If file in `@file` is a directory, it is an error. +// +// RUN: not %clang @%S/Inputs -### 2>&1 | FileCheck --check-prefix=DIRECTORY %s +// DIRECTORY: cannot not open file '{{.*}}Inputs': {{[Ii]}}s a directory diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index 2cc3b48609cb3..4b1a246d99430 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -378,7 +378,7 @@ int clang_main(int Argc, char **Argv) { llvm::cl::ExpansionContext ECtx(A, Tokenizer); ECtx.setMarkEOLs(MarkEOLs); if (llvm::Error Err = ECtx.expandResponseFiles(Args)) { - llvm::errs() << Err << '\n'; + llvm::errs() << toString(std::move(Err)) << '\n'; return 1; } diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index b143cd6329455..b45bab06d64b8 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -596,9 +596,10 @@ TEST(ToolChainTest, ConfigInexistentInclude) { ASSERT_TRUE(C); ASSERT_TRUE(C->containsError()); EXPECT_EQ(1U, DiagConsumer->Errors.size()); - EXPECT_STREQ("cannot read configuration file '" USERCONFIG - "': cannot not open file '" UNEXISTENT "'", - DiagConsumer->Errors[0].c_str()); + EXPECT_STRCASEEQ("cannot read configuration file '" USERCONFIG + "': cannot not open file '" UNEXISTENT + "': no such file or directory", + DiagConsumer->Errors[0].c_str()); } #undef USERCONFIG diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 136b813b1f6c8..fbaacbbbcf8a0 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1158,9 +1158,11 @@ Error ExpansionContext::expandResponseFile( assert(sys::path::is_absolute(FName)); llvm::ErrorOr> MemBufOrErr = FS->getBufferForFile(FName); - if (!MemBufOrErr) - return llvm::createStringError( - MemBufOrErr.getError(), Twine("cannot not open file '") + FName + "'"); + if (!MemBufOrErr) { + std::error_code EC = MemBufOrErr.getError(); + return llvm::createStringError(EC, Twine("cannot not open file '") + FName + + "': " + EC.message()); + } MemoryBuffer &MemBuf = *MemBufOrErr.get(); StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); @@ -1262,7 +1264,7 @@ Error ExpansionContext::expandResponseFiles( if (auto CWD = FS->getCurrentWorkingDirectory()) { CurrDir = *CWD; } else { - return make_error( + return createStringError( CWD.getError(), Twine("cannot get absolute path for: ") + FName); } } else { @@ -1271,49 +1273,48 @@ Error ExpansionContext::expandResponseFiles( llvm::sys::path::append(CurrDir, FName); FName = CurrDir.c_str(); } + + ErrorOr Res = FS->status(FName); + if (!Res || !Res->exists()) { + std::error_code EC = Res.getError(); + if (!InConfigFile) { + // If the specified file does not exist, leave '@file' unexpanded, as + // libiberty does. + if (!EC || EC == llvm::errc::no_such_file_or_directory) { + ++I; + continue; + } + } + if (!EC) + EC = llvm::errc::no_such_file_or_directory; + return createStringError(EC, Twine("cannot not open file '") + FName + + "': " + EC.message()); + } + const llvm::vfs::Status &FileStatus = Res.get(); + auto IsEquivalent = - [FName, this](const ResponseFileRecord &RFile) -> ErrorOr { - ErrorOr LHS = FS->status(FName); - if (!LHS) - return LHS.getError(); + [FileStatus, this](const ResponseFileRecord &RFile) -> ErrorOr { ErrorOr RHS = FS->status(RFile.File); if (!RHS) return RHS.getError(); - return LHS->equivalent(*RHS); + return FileStatus.equivalent(*RHS); }; // Check for recursive response files. for (const auto &F : drop_begin(FileStack)) { if (ErrorOr R = IsEquivalent(F)) { if (R.get()) - return make_error( - Twine("recursive expansion of: '") + F.File + "'", R.getError()); + return createStringError( + R.getError(), Twine("recursive expansion of: '") + F.File + "'"); } else { - return make_error(Twine("cannot open file: ") + F.File, - R.getError()); + return createStringError(R.getError(), + Twine("cannot open file: ") + F.File); } } // Replace this response file argument with the tokenization of its // contents. Nested response files are expanded in subsequent iterations. SmallVector ExpandedArgv; - if (!InConfigFile) { - // If the specified file does not exist, leave '@file' unexpanded, as - // libiberty does. - ErrorOr Res = FS->status(FName); - if (!Res) { - std::error_code EC = Res.getError(); - if (EC == llvm::errc::no_such_file_or_directory) { - ++I; - continue; - } - } else { - if (!Res->exists()) { - ++I; - continue; - } - } - } if (Error Err = expandResponseFile(FName, ExpandedArgv)) return Err; From 8cb9e3c3ce1e7e1658921f90420b68ca16bb98fc Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Wed, 2 Nov 2022 10:19:34 +0000 Subject: [PATCH 113/516] [AArch64] Install arm_neon_sve_bridge.h arm_neon_sve_bridge.h is not generated, so the rules which ensure the generated files get copied into the installation prefix don't apply to this one. Add it to the aarch64_only_files set instead, which ensures it ends up both in the build directory and the installation directory. Tested with build targets `clang-resource-headers` and `install-clang-resource-headers`. Differential Revision: https://reviews.llvm.org/D137239 --- clang/lib/Headers/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index f69bf14891440..402b7374ca816 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -33,6 +33,7 @@ set(arm_only_files set(aarch64_only_files arm64intr.h + arm_neon_sve_bridge.h ) set(cuda_files @@ -326,10 +327,6 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD) clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h) # Generate arm_cde.h clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h) - # Copy arm_neon_sve_bridge.h - copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} - arm_neon_sve_bridge.h - ) # Add headers to target specific lists list(APPEND arm_common_generated_files @@ -345,7 +342,6 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD) list(APPEND aarch64_only_generated_files "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h" "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h" - "${output_dir}/arm_neon_sve_bridge.h" ) endif() if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD) From e1790c8c290d773cd5b1fc79f80b7a23dceb7589 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Thu, 3 Nov 2022 07:56:03 +0000 Subject: [PATCH 114/516] Revert "[InstCombine] Remove redundant splats in InstCombineVectorOps" This reverts commit 957eed0b1af2cb88edafe1ff2643a38165c67a40. --- .../InstCombine/InstCombineInternal.h | 1 - .../InstCombine/InstCombineVectorOps.cpp | 32 +-- .../Transforms/InstCombine/shuffle-binop.ll | 60 ++--- .../AArch64/insert-shuffle-binop.ll | 216 ++++++++++++++++++ 4 files changed, 231 insertions(+), 78 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 11aed7754c264..3f1bcea3727f5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -167,7 +167,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); - Instruction *simplifyBinOpSplats(ShuffleVectorInst &SVI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); Instruction *visitLandingPadInst(LandingPadInst &LI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6581fe0b9dc91..d50918629ba5c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2598,34 +2598,6 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { return new ShuffleVectorInst(X, Y, NewMask); } -// Splatting the first element of the result of a BinOp, where any of the -// BinOp's operands are the result of a first element splat can be simplified to -// splatting the first element of the result of the BinOp -Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { - if (!SVI.isZeroEltSplat()) - return nullptr; - - Value *Op0 = SVI.getOperand(0); - Value *X, *Y; - if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()), - m_Value(Y))) && - !match(Op0, m_BinOp(m_Value(X), - m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask())))) - return nullptr; - if (X->getType() != Y->getType()) - return nullptr; - - auto *BinOp = cast(Op0); - if (!isSafeToSpeculativelyExecute(BinOp)) - return nullptr; - - Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y); - if (auto NewBOI = dyn_cast(NewBO)) - NewBOI->copyIRFlags(BinOp); - - return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); -} - Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -2634,9 +2606,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); - if (Instruction *I = simplifyBinOpSplats(SVI)) - return I; - + // Bail out for scalable vectors if (isa(LHS->getType())) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/shuffle-binop.ll b/llvm/test/Transforms/InstCombine/shuffle-binop.ll index c26c293c695af..fe2d1af5a04f3 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-binop.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-binop.ll @@ -50,13 +50,13 @@ define <4 x i8> @splat_binop_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i8> [[X]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = add <4 x i8> [[XSPLAT]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) - %b = add nsw <4 x i8> %xsplat, %y + %b = add <4 x i8> %xsplat, %y %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } @@ -65,14 +65,14 @@ define <4 x i8> @splat_binop_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i8> [[X:%.*]], [[Y]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = sub <4 x i8> [[X:%.*]], [[YSPLAT]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) %b = sub <4 x i8> %x, %ysplat - %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> + %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } @@ -82,40 +82,21 @@ define <4 x i8> @splat_binop_splat_x_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i8> [[Y]], [[X]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[XSPLAT]], [[YSPLAT]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) - %b = mul nuw <4 x i8> %xsplat, %ysplat + %b = mul <4 x i8> %xsplat, %ysplat %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } -define <4 x float> @splat_binop_splat_x_splat_y_fmath_flags(<4 x float> %x, <4 x float> %y) { -; CHECK-LABEL: @splat_binop_splat_x_splat_y_fmath_flags( -; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: call void @use(<4 x float> [[XSPLAT]]) -; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: call void @use(<4 x float> [[YSPLAT]]) -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[Y]], [[X]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[BSPLAT]] -; - %xsplat = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> zeroinitializer - call void @use(<4 x float> %xsplat) - %ysplat = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> zeroinitializer - call void @use(<4 x float> %ysplat) - %b = fmul fast <4 x float> %xsplat, %ysplat - %bsplat = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %bsplat -} - -define @vscale_splat_udiv_splat_x( %x, %y) { -; CHECK-LABEL: @vscale_splat_udiv_splat_x( +define @vscale_splat_binop_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_binop_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer ; CHECK-NEXT: [[B:%.*]] = udiv [[XSPLAT]], [[Y:%.*]] ; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer @@ -127,19 +108,6 @@ define @vscale_splat_udiv_splat_x( %x, %bsplat } -define @vscale_splat_urem_splat_x( %x, %y) { -; CHECK-LABEL: @vscale_splat_urem_splat_x( -; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer -; CHECK-NEXT: [[B:%.*]] = urem [[XSPLAT]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer -; CHECK-NEXT: ret [[BSPLAT]] -; - %xsplat = shufflevector %x, poison, zeroinitializer - %b = urem %xsplat, %y - %bsplat = shufflevector %b, poison, zeroinitializer - ret %bsplat -} - define @vscale_splat_binop_splat_y( %x, %y) { ; CHECK-LABEL: @vscale_splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer @@ -172,8 +140,8 @@ define @vscale_splat_binop_splat_x_splat_y_calls( [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer ; CHECK-NEXT: call void @use_v( [[YSPLAT]]) -; CHECK-NEXT: [[TMP1:%.*]] = lshr [[X]], [[Y]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; CHECK-NEXT: [[B:%.*]] = lshr [[XSPLAT]], [[YSPLAT]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer ; CHECK-NEXT: ret [[BSPLAT]] ; %xsplat = shufflevector %x, poison, zeroinitializer @@ -186,4 +154,4 @@ define @vscale_splat_binop_splat_x_splat_y_calls() -declare void @use_v() +declare void @use_v() \ No newline at end of file diff --git a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll b/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll new file mode 100644 index 0000000000000..c75f53bc68583 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S %s | FileCheck %s + +target triple = "aarch64-none-eabi" + +define @fadd_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_vscale_insertelt_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %r = fadd fast %broadcast.splatinsert2, %broadcast.splat + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fadd_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_fixed_insertelt_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %r = fadd fast <4 x float> %broadcast.splatinsert2, %broadcast.splat + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} + +define @fsub_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_vscale_insertelt_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %r = fsub fast %broadcast.splatinsert2, %broadcast.splat + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fsub_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_fixed_insertelt_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %r = fsub fast <4 x float> %broadcast.splatinsert2, %broadcast.splat + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} + +define @fadd_vscale_shuffle_insert_a_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %r = fadd fast %broadcast.splat, %broadcast.splatinsert2 + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fadd_fixed_shuffle_insert_a_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} + +define @fsub_vscale_shuffle_insert_a_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %r = fsub fast %broadcast.splat, %broadcast.splatinsert2 + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fsub_fixed_shuffle_insert_a_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} + +define @fadd_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer + %r = fadd fast %broadcast.splat, %broadcast.splat2 + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fadd_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer + %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splat2 + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} + +define @fsub_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[TMP3]] +; + %broadcast.splatinsert = insertelement poison, float %0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 + %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer + %r = fsub fast %broadcast.splat, %broadcast.splat2 + %3 = shufflevector %r, poison, zeroinitializer + ret %3 +} + +define <4 x float> @fsub_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { +; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_shuffle_insert_b( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 + %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer + %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splat2 + %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %3 +} From 020a9d7b20a2f405b6fd61be0d9f946da44c79af Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Fri, 21 Oct 2022 11:23:55 +0000 Subject: [PATCH 115/516] [GISel] Add (fsub +-0.0, X) -> fneg combine Allows for better matching of VOP3 mods. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D136442 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 + .../include/llvm/Target/GlobalISel/Combine.td | 10 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 32 ++ .../AMDGPU/GlobalISel/combine-fsub-fneg.mir | 387 ++++++++++++++++++ llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll | 77 ++-- llvm/test/CodeGen/AMDGPU/v_pack.ll | 4 +- 6 files changed, 468 insertions(+), 45 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 515382a8e869e..5c54f0e8ab058 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -696,6 +696,9 @@ class CombinerHelper { /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo); + void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo); + bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate = false); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9f29e9faf385b..dd5d929e615c0 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -856,6 +856,13 @@ def redundant_neg_operands: GICombineRule< [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +// Transform (fsub +-0.0, X) -> (fneg X) +def fsub_to_fneg: GICombineRule< + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchFsubToFneg(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyFsubToFneg(*${root}, ${matchinfo}); }])>; + // Transform (fadd x, (fmul y, z)) -> (fma y, z, x) // (fadd x, (fmul y, z)) -> (fmad y, z, x) // Transform (fadd (fmul x, y), z) -> (fma x, y, z) @@ -1056,7 +1063,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, redundant_binop_in_equality]>; + sub_add_reg, select_to_minmax, redundant_binop_in_equality, + fsub_to_fneg]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1fea2607c061f..a233936ae9dae 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5190,6 +5190,38 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + Register LHS = MI.getOperand(1).getReg(); + MatchInfo = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + const auto LHSCst = Ty.isVector() + ? getFConstantSplat(LHS, MRI, /* allowUndef */ true) + : getFConstantVRegValWithLookThrough(LHS, MRI); + if (!LHSCst) + return false; + + // -0.0 is always allowed + if (LHSCst->Value.isNegZero()) + return true; + + // +0.0 is only allowed if nsz is set. + if (LHSCst->Value.isPosZero()) + return MI.getFlag(MachineInstr::FmNsz); + + return false; +} + +void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + Register Dst = MI.getOperand(0).getReg(); + Builder.buildFNeg( + Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0)); + eraseInst(MI); +} + /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either /// due to global flags or MachineInstr flags. static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir new file mode 100644 index 0000000000000..2bce205735299 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir @@ -0,0 +1,387 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_f16_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_poszero_nsz + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half 0.0 + %sub:_(s16) = nsz G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f16_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: %sub:_(s16) = G_FSUB %cst, %input + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT %sub(s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half 0.0 + %sub:_(s16) = G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f16_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_negzero + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half -0.0 + %sub:_(s16) = G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f32_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_poszero_nsz + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float 0.0 + %sub:_(s32) = nsz G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f32_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: %sub:_(s32) = G_FSUB %cst, %input + ; CHECK-NEXT: $vgpr0 = COPY %sub(s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float 0.0 + %sub:_(s32) = G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f32_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_negzero + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float -0.0 + %sub:_(s32) = G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f64_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_poszero_nsz + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double 0.0 + %sub:_(s64) = nsz G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_f64_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: %sub:_(s64) = G_FSUB %cst, %input + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double 0.0 + %sub:_(s64) = G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_f64_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_negzero + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double -0.0 + %sub:_(s64) = G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_poszero_nsz + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half 0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst(s16), %cst(s16), %cst(s16), %cst(s16) + ; CHECK-NEXT: %sub:_(<4 x s16>) = G_FSUB %veccst, %input + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half 0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_negzero + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half -0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float 0.0 + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_negzero + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float -0.0 + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s32>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_negzero_undef_elt +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_negzero_undef_elt + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float -0.0 + %undef:_(s32) = G_IMPLICIT_DEF + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst + %sub:_(<4 x s32>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_poszero_undef_elt +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_poszero_undef_elt + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float 0.0 + %undef:_(s32) = G_IMPLICIT_DEF + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst + %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v2f64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v2f64 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) + %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s64) = G_FCONSTANT double 0.0 + %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst + %sub:_(<2 x s64>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v2f64_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v2f64_negzero + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) + %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s64) = G_FCONSTANT double -0.0 + %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst + %sub:_(<2 x s64>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll index 2e9a66c579cbe..92961ab1c4dda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 ; SI-NEXT: v_med3_f32 v2, v2, v3, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -56,7 +56,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 ; VI-NEXT: v_med3_f32 v2, v4, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -72,7 +72,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -88,7 +88,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX10-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -104,7 +104,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -145,7 +145,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v5, v2, v3 ; SI-NEXT: v_max_f32_e32 v2, v2, v3 @@ -183,7 +183,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 ; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; VI-NEXT: v_min_f32_e32 v5, v4, v2 ; VI-NEXT: v_max_f32_e32 v2, v4, v2 @@ -204,7 +204,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 @@ -225,7 +225,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 ; GFX10-NEXT: v_max_f32_e32 v4, v1, v2 @@ -246,7 +246,8 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_sub_f32 v1, 0x80000000, v1 :: v_dual_max_f32 v2, v2, v2 +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f32_e32 v4, v1, v2 ; GFX11-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3 @@ -289,9 +290,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_mov_b32 s2, 0x80000000 -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 -; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 +; SI-NEXT: v_mul_f32_e64 v4, -1.0, |v4| ; SI-NEXT: v_med3_f32 v2, v2, |v3|, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -320,13 +320,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_load_dword v3, v[4:5] glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s2, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 -; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 +; VI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| ; VI-NEXT: v_med3_f32 v2, v4, |v2|, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -342,9 +341,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s2, 0x80000000 -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX9-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -360,8 +358,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX10-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX10-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -377,8 +375,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX11-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -425,10 +423,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_mov_b32 s2, 0x80000000 -; SI-NEXT: v_sub_f32_e64 v2, s2, |v2| -; SI-NEXT: v_sub_f32_e64 v3, s2, |v3| -; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_mul_f32_e64 v2, -1.0, |v2| +; SI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| +; SI-NEXT: v_mul_f32_e64 v4, -1.0, |v4| ; SI-NEXT: v_med3_f32 v2, v2, v3, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -457,14 +454,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_load_dword v3, v[4:5] glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s2, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e64 v4, s2, |v7| -; VI-NEXT: v_sub_f32_e64 v2, s2, |v2| -; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_mul_f32_e64 v4, -1.0, |v7| +; VI-NEXT: v_mul_f32_e64 v2, -1.0, |v2| +; VI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| ; VI-NEXT: v_med3_f32 v2, v4, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -480,10 +476,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s2, 0x80000000 -; GFX9-NEXT: v_sub_f32_e64 v1, s2, |v1| -; GFX9-NEXT: v_sub_f32_e64 v2, s2, |v2| -; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX9-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -499,9 +494,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e64 v1, 0x80000000, |v1| -; GFX10-NEXT: v_sub_f32_e64 v2, 0x80000000, |v2| -; GFX10-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX10-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX10-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -517,9 +512,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e64 v1, 0x80000000, |v1| -; GFX11-NEXT: v_sub_f32_e64 v2, 0x80000000, |v2| -; GFX11-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX11-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll index e17d38cff6332..1fbf9593aceea 100644 --- a/llvm/test/CodeGen/AMDGPU/v_pack.ll +++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll @@ -223,9 +223,7 @@ define amdgpu_kernel void @v_pack_b32.fneg(half addrspace(1)* %in0, half addrspa ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1 ; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 -; GISEL-NEXT: v_sub_f16_e32 v0, 0x8000, v0 -; GISEL-NEXT: v_sub_f16_e32 v1, 0x8000, v1 -; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GISEL-NEXT: v_pack_b32_f16 v0, -v0, -v1 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; use v0 ; GISEL-NEXT: ;;#ASMEND From 9135137718bbb8322dc42a1026ce3f843bbeacc6 Mon Sep 17 00:00:00 2001 From: Jannik Silvanus Date: Wed, 2 Nov 2022 16:51:01 +0100 Subject: [PATCH 116/516] [llvm-diff] Precommit: Add loop test case with forward reference Diffing phi nodes was recently added to llvm-diff. However, there currently is a limitation where equivalent values cannot be detected as such, leading to false positive diff reports. If a phi node refers a value defined in a basic block dominated by the current basic block, for example a phi node in a loop header referring a value defined in the loop body, we cannot prove equivalence of the referred values, because the basic block containing the variable definition has not yet been processed. This commit adds a test case showing this behavior, serving as a precommit for an upcoming fix of the above. Differential Revision: https://reviews.llvm.org/D137262 --- llvm/test/tools/llvm-diff/loop.ll | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 llvm/test/tools/llvm-diff/loop.ll diff --git a/llvm/test/tools/llvm-diff/loop.ll b/llvm/test/tools/llvm-diff/loop.ll new file mode 100644 index 0000000000000..8c50bc616d5cf --- /dev/null +++ b/llvm/test/tools/llvm-diff/loop.ll @@ -0,0 +1,49 @@ +; Diff file with itself +; Due to a current limitation in llvm-diff, a diff is reported here. +; RUN: not llvm-diff %s %s 2>&1 | FileCheck --check-prefix=SAME-FILE %s + +; Replace %newvar1 with %newvar2 in the phi node. This can only +; be detected to be different once BB1 has been processed. +; RUN: rm -f %t.ll +; RUN: cat %s | sed -e 's/ %newvar1, %BB1 / %newvar2, %BB1 /' > %t.ll +; RUN: not llvm-diff %s %t.ll 2>&1 | FileCheck --check-prefix DIFFERENT-VAR %s + +; SAME-FILE: in function func: +; SAME-FILE-NEXT: in block %BB0: +; SAME-FILE-NEXT: > %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; SAME-FILE-NEXT: > %cnd = icmp eq i32 %var, 0 +; SAME-FILE-NEXT: > br i1 %cnd, label %BB1, label %END +; SAME-FILE-NEXT: < %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; SAME-FILE-NEXT: < %cnd = icmp eq i32 %var, 0 +; SAME-FILE-NEXT: < br i1 %cnd, label %BB1, label %END + +; DIFFERENT-VAR: in function func: +; DIFFERENT-VAR-NEXT: in block %BB0: +; DIFFERENT-VAR-NEXT: > %var = phi i32 [ 0, %ENTRY ], [ %newvar2, %BB1 ] +; DIFFERENT-VAR-NEXT: > %cnd = icmp eq i32 %var, 0 +; DIFFERENT-VAR-NEXT: > br i1 %cnd, label %BB1, label %END +; DIFFERENT-VAR-NEXT: < %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; DIFFERENT-VAR-NEXT: < %cnd = icmp eq i32 %var, 0 +; DIFFERENT-VAR-NEXT: < br i1 %cnd, label %BB1, label %END +define i32 @func() { +ENTRY: + br label %BB0 + +BB0: + ; When diffing this phi node, we need to detect whether + ; %newvar1 is equivalent, which is not known until BB1 has been processed. + %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] + %cnd = icmp eq i32 %var, 0 + br i1 %cnd, label %BB1, label %END + +BB1: + %newvar1 = add i32 %var, 1 + %newvar2 = add i32 %var, 2 + br label %BB0 + +END: + ; Equivalence of the ret depends on equivalence of %var. + ; Even if %var differs, we do not report a diff here, because + ; this is an indirect diff caused by another diff. + ret i32 %var +} From 4ecb2b8ef6be69b55d46ac274f3b7a7103219f98 Mon Sep 17 00:00:00 2001 From: Kristina Bessonova Date: Thu, 3 Nov 2022 10:27:10 +0200 Subject: [PATCH 117/516] [DebugInfo][Metadata] Make AllEnumTypes holding TrackingMDNodeRef Having AllEnumtypes to be a vector of TrackingMDNodeRef makes it possible to reflect changes in metadata in the vector if they took place before DIBuilder being finalized. Otherwise, we end up with heap-use-after-free because AllEnumTypes contains metadata that no longer valid. Consider a case where we have a class containing a definition of a enum, so this enum has the class as a scope. For some reason (doesn't matter for the current issue), we create a temporary debug metadata for this class, and then resolve it while finalizing CGDebugInfo. In the case of collision during uniqifying the temporary, we then need to replace its uses with a new pointer. If a temporary's user is unique (this is the enum mentioned above), we may need re-uniquefying it, which may return a new pointer in the case of another collision. If so, the pointer we stored in AllEnumTypes vector become dangling. Making AllEnumTypes hodling TrackingMDNodeRef should solve this problem (see debug-info-enum-metadata-collision.cpp test for details). Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D137067 --- .../debug-info-enum-metadata-collision.cpp | 25 +++++++++++++++++++ llvm/include/llvm/IR/DIBuilder.h | 2 +- llvm/lib/IR/DIBuilder.cpp | 6 +++-- 3 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp diff --git a/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp b/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp new file mode 100644 index 0000000000000..dd27acd0a77c5 --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=constructor %s -o - | FileCheck %s + +// Test that clang doesn't crash while resolving temporary debug metadata of +// a record with collisions in the record's enum users. + +// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, +// CHECK-SAME: scope: [[SCOPE:![0-9]+]] +// CHECK-SAME: elements: [[ELEMENTS:![0-9]+]] +// CHECK: [[SCOPE]] = !DICompositeType(tag: DW_TAG_structure_type +// CHECK-SAME: name: "Struct1" +// CHECK: [[ELEMENTS]] = !{[[ELEMENT:![0-9]+]]} +// CHECK: [[ELEMENT]] = !DIEnumerator(name: "enumValue1" + +template struct Struct1 { + enum { enumValue1 }; + Struct1(); +}; +void function2() { + struct Struct3 {}; + int i = Struct1::enumValue1; +} +void function3() { + struct Struct3 {}; + int i = Struct1::enumValue1; +} diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index dfb65ce341296..61fa4d8f3b9fd 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -48,7 +48,7 @@ namespace llvm { Function *LabelFn; ///< llvm.dbg.label Function *AddrFn; ///< llvm.dbg.addr - SmallVector AllEnumTypes; + SmallVector AllEnumTypes; /// Track the RetainTypes, since they can be updated later on. SmallVector AllRetainTypes; SmallVector AllSubprograms; diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index fada07ac383ae..76d7ade09a88c 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -84,7 +84,9 @@ void DIBuilder::finalize() { } if (!AllEnumTypes.empty()) - CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes)); + CUNode->replaceEnumTypes(MDTuple::get( + VMContext, SmallVector(AllEnumTypes.begin(), + AllEnumTypes.end()))); SmallVector RetainValues; // Declarations and definitions of the same type may be retained. Some @@ -556,7 +558,7 @@ DICompositeType *DIBuilder::createEnumerationType( getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0, IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements, 0, nullptr, nullptr, UniqueIdentifier); - AllEnumTypes.push_back(CTy); + AllEnumTypes.emplace_back(CTy); trackIfUnresolved(CTy); return CTy; } From 73d2a4cfd82a8aa62c9ae1bf754dec920befb06d Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Thu, 3 Nov 2022 08:30:09 +0000 Subject: [PATCH 118/516] [AArch64] SME2 -Fix failing buildbots because of warning This patch is to solve this: https://lab.llvm.org/buildbot#builders/36/builds/26801 Created by this patch: a20112a74cb34f [AArch64]SME2 instructions that use ZTO operand --- llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 578c5140fd30b..e04c054205050 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2829,6 +2829,8 @@ unsigned AArch64AsmParser::getNumRegsForRegKind(RegKind K) { case RegKind::SVEPredicateVector: case RegKind::SVEPredicateAsCounter: return 16; + case RegKind::LookupTable: + return 512; } llvm_unreachable("Unsupported RegKind"); } @@ -4344,7 +4346,7 @@ OperandMatchResultTy AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { SMLoc StartLoc = getLoc(); const AsmToken &Tok = getTok(); - StringRef Name = Tok.getString().lower(); + std::string Name = Tok.getString().lower(); unsigned RegNum = matchRegisterNameAlias(Name, RegKind::LookupTable); From ac7542bd3c28890545463f30a297c4c4defe5590 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 09:56:29 +0100 Subject: [PATCH 119/516] [CVP] Add vector icmp test (NFC) --- .../CorrelatedValuePropagation/icmp.ll | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index cb3914742f9d7..4a3d9c752f04e 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -16,10 +16,10 @@ define void @test1(i64 %tmp35) { ; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i64 [[TMP35:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP36]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] ; CHECK: bb_true: -; CHECK-NEXT: tail call void @check1(i1 false) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: tail call void @check1(i1 false) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR2]] ; CHECK-NEXT: unreachable ; bb: @@ -55,7 +55,7 @@ define void @test2(i64 %tmp35, i1 %inner_cmp) { ; CHECK-NEXT: tail call void @check1(i1 false) ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR2]] ; CHECK-NEXT: unreachable ; bb: @@ -1216,5 +1216,35 @@ else: ret void } +define i1 @non_const_range_minmax(i8 %a, i8 %b) { +; CHECK-LABEL: @non_const_range_minmax( +; CHECK-NEXT: [[A2:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 10) +; CHECK-NEXT: [[B2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 11) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 [[A2]], [[B2]] +; CHECK-NEXT: ret i1 [[CMP1]] +; + %a2 = call i8 @llvm.umin.i8(i8 %a, i8 10) + %b2 = call i8 @llvm.umax.i8(i8 %b, i8 11) + %cmp1 = icmp ult i8 %a2, %b2 + ret i1 %cmp1 +} + +define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @non_const_range_minmax_vec( +; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> ) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[A2]], [[B2]] +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> ) + %b2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %b, <2 x i8> ) + %cmp1 = icmp ult <2 x i8> %a2, %b2 + ret <2 x i1> %cmp1 +} + +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.umax.i8(i8, i8) +declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>) +declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>) attributes #4 = { noreturn } From 78466a9f8730fee1feabcdae92576f77df6e775d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 10:15:10 +0100 Subject: [PATCH 120/516] [InstCombine] Add tests for memset -> load forwarding (NFC) We currently only forward store -> load, but could do the same for memset as well. --- .../InstCombine/load-store-forward.ll | 79 ++++++++++++++++--- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index 767c370e2614a..bfdbbf6bdb0e2 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -104,8 +104,8 @@ define i32 @load_i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_i32_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -156,8 +156,8 @@ define float @load_f32_store_nxv4f32(ptr %a) { ; CHECK-LABEL: @load_f32_store_nxv4f32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[A]], align 4 -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: ret float [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, float 1.0, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -182,8 +182,8 @@ define <4 x i32> @load_v4i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i32_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[A]], align 16 -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -195,8 +195,8 @@ define <4 x i16> @load_v4i16_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i16_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[A]], align 16 -; CHECK-NEXT: ret <4 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: ret <4 x i16> [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -224,8 +224,8 @@ define @load_nxv4i8_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_nxv4i8_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[A]], align 16 -; CHECK-NEXT: ret [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A]], align 16 +; CHECK-NEXT: ret [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -253,3 +253,62 @@ define i1 @load_i1_store_i8(ptr %a) { %v = load i1, ptr %a ret i1 %v } + +define i32 @load_after_memset_0(ptr %a) { +; CHECK-LABEL: @load_after_memset_0( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define i32 @load_after_memset_1(ptr %a) { +; CHECK-LABEL: @load_after_memset_1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define i32 @load_after_memset_unknown(ptr %a, i8 %byte) { +; CHECK-LABEL: @load_after_memset_unknown( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 [[BYTE:%.*]], i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 %byte, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define i32 @load_after_memset_0_clobber(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_clobber( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, ptr [[A]], align 1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + store i8 1, ptr %a + %v = load i32, ptr %a + ret i32 %v +} + +define i256 @load_after_memset_0_too_small(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_too_small( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i256, ptr [[A]], align 4 +; CHECK-NEXT: ret i256 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i256, ptr %a + ret i256 %v +} + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) From a71b408aefdbfd903acc88a38f887780e96f9336 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 10:20:38 +0100 Subject: [PATCH 121/516] [InstCombine] Add tests for memset -> load forward with offset (NFC) --- .../InstCombine/load-store-forward.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index bfdbbf6bdb0e2..1d51e1c3c18a1 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -287,6 +287,45 @@ define i32 @load_after_memset_unknown(ptr %a, i8 %byte) { ret i32 %v } +define i32 @load_after_memset_0_offset(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 4 + %v = load i32, ptr %gep + ret i32 %v +} + +define i32 @load_after_memset_0_offset_too_large(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset_too_large( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 13 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 13 + %v = load i32, ptr %gep + ret i32 %v +} + +define i32 @load_after_memset_0_offset_negative(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset_negative( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 -1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 -1 + %v = load i32, ptr %gep + ret i32 %v +} + define i32 @load_after_memset_0_clobber(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_clobber( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) From fa8aeab606c1ca2756bf8b6a451998f20671ce52 Mon Sep 17 00:00:00 2001 From: Simi Pallipurath Date: Fri, 28 Oct 2022 16:06:00 +0100 Subject: [PATCH 122/516] [AArch64] Add support for the Cortex-A715 CPU Cortex-A715 is an Armv9-A AArch64 CPU. This patch introduces support for Cortex-A715. Technical Reference Manual: https://developer.arm.com/documentation/101590/latest. Reviewed By: vhscampos Differential Revision: https://reviews.llvm.org/D136957 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/test/Driver/aarch64-mcpu.c | 2 ++ clang/test/Misc/target-invalid-cpu-note.c | 4 ++-- llvm/docs/ReleaseNotes.rst | 2 ++ llvm/include/llvm/Support/AArch64TargetParser.def | 6 ++++++ llvm/lib/Support/Host.cpp | 1 + llvm/lib/Target/AArch64/AArch64.td | 14 ++++++++++++++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 + llvm/unittests/Support/TargetParserTest.cpp | 15 ++++++++++++++- 10 files changed, 46 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 435d9ded7c72e..763f4cece4634 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -733,6 +733,9 @@ Arm and AArch64 Support in Clang them, which it cannot. - Add driver and tuning support for Neoverse V2 via the flag ``-mcpu=neoverse-v2``. Native detection is also supported via ``-mcpu=native``. +- Support has been added for the following processors (-mcpu identifiers in parenthesis): + + * Arm Cortex-A715 (cortex-a715). Floating Point Support in Clang ------------------------------- diff --git a/clang/test/Driver/aarch64-mcpu.c b/clang/test/Driver/aarch64-mcpu.c index 0433f6a5b3d3f..b40c579acdf00 100644 --- a/clang/test/Driver/aarch64-mcpu.c +++ b/clang/test/Driver/aarch64-mcpu.c @@ -45,6 +45,8 @@ // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78" // RUN: %clang -target aarch64 -mcpu=cortex-a78c -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78C %s // CORTEX-A78C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78c" +// RUN: %clang -target aarch64 -mcpu=cortex-a715 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A715 %s +// CORTEX-A715: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a715" // RUN: %clang -target aarch64 -mcpu=neoverse-e1 -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-E1 %s // NEOVERSE-E1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-e1" // RUN: %clang -target aarch64 -mcpu=neoverse-v1 -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-V1 %s diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index f2071c866956f..c0b542086a752 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -5,11 +5,11 @@ // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 // AARCH64: error: unknown target CPU 'not-a-cpu' -// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 6f0a64fd43468..ba9213557e935 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -81,6 +81,8 @@ Changes to TableGen Changes to the AArch64 Backend ------------------------------ +* Added support for the Cortex-A715 CPU. + Changes to the AMDGPU Backend ----------------------------- diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 2005638e38c3a..2f83d0656c4dc 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -204,6 +204,12 @@ AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16)) +AARCH64_CPU_NAME("cortex-a715", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_SB | AArch64::AEK_SSBS | AArch64::AEK_MTE | + AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_PAUTH | + AArch64::AEK_I8MM | AArch64::AEK_PREDRES | AArch64::AEK_PERFMON | + AArch64::AEK_PROFILE | AArch64::AEK_SVE | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_BF16 | AArch64::AEK_FLAGM)) AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 70cae1e221b2b..d4495b387bde6 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -213,6 +213,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xd0b", "cortex-a76") .Case("0xd0d", "cortex-a77") .Case("0xd41", "cortex-a78") + .Case("0xd4d", "cortex-a715") .Case("0xd44", "cortex-x1") .Case("0xd4c", "cortex-x1c") .Case("0xd0c", "neoverse-n1") diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 03759f5911a3a..48c4b78b65975 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -764,6 +764,14 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureLSLFast, FeaturePostRAScheduler]>; +def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", + "Cortex-A715 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureLSLFast, + FeatureFuseAdrpAdd]>; + def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", "CortexR82", "Cortex-R82 ARM processors", [ @@ -1093,6 +1101,10 @@ def ProcessorFeatures { list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, FeatureETE, FeatureMTE, FeatureFP16FML, FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; + list A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, + FeatureFP16FML, FeatureSVE, FeatureTRBE, + FeatureSVE2BitPerm, FeatureBF16, FeatureETE, + FeaturePerfMon, FeatureMatMulInt8, FeatureSPE]; list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSSBS, FeaturePredRes, FeatureSB]; @@ -1231,6 +1243,8 @@ def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, [TuneA78C]>; def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, [TuneA710]>; +def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715, + [TuneA715]>; def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 5b3b6c00ed216..4acf8a1bf8603 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -142,6 +142,7 @@ void AArch64Subtarget::initializeProperties() { MaxBytesForLoopAlignment = 8; break; case CortexA710: + case CortexA715: case CortexX2: PrefFunctionLogAlignment = 4; VScaleForTuning = 1; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 15c3961087d1c..4718a01ad2166 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -64,6 +64,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { CortexA78, CortexA78C, CortexA710, + CortexA715, CortexR82, CortexX1, CortexX1C, diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 805084ac1f295..951d4f371562a 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1016,6 +1016,19 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_FLAGM | AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_BF16, "9-A"), + ARMCPUTestParams("cortex-a715", "armv9-a", "neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_BF16 | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_RDM | + AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_MTE | AArch64::AEK_PAUTH | + AArch64::AEK_SVE | AArch64::AEK_SVE2 | + AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS | + AArch64::AEK_SB | AArch64::AEK_I8MM | + AArch64::AEK_PERFMON | AArch64::AEK_PREDRES | + AArch64::AEK_PROFILE | AArch64::AEK_FP16FML | + AArch64::AEK_FP16 | AArch64::AEK_FLAGM, + "9-A"), ARMCPUTestParams( "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | @@ -1296,7 +1309,7 @@ INSTANTIATE_TEST_SUITE_P( "8.2-A"))); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 59; +static constexpr unsigned NumAArch64CPUArchs = 60; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; From 5945ab10c36a908e3cb540f80a7c97dcaf19ec5b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 10:55:30 +0100 Subject: [PATCH 123/516] [InstCombine] Add more memset->load forwarding tests (NFC) --- .../InstCombine/load-store-forward.ll | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index 1d51e1c3c18a1..d90af935c65e5 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -265,6 +265,39 @@ define i32 @load_after_memset_0(ptr %a) { ret i32 %v } +define float @load_after_memset_0_float(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_float( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: ret float [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load float, ptr %a + ret float %v +} + +define i27 @load_after_memset_0_non_byte_sized(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_non_byte_sized( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i27, ptr [[A]], align 4 +; CHECK-NEXT: ret i27 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i27, ptr %a + ret i27 %v +} + +define <4 x i8> @load_after_memset_0_vec(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_vec( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load <4 x i8>, ptr [[A]], align 4 +; CHECK-NEXT: ret <4 x i8> [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load <4 x i8>, ptr %a + ret <4 x i8> %v +} + define i32 @load_after_memset_1(ptr %a) { ; CHECK-LABEL: @load_after_memset_1( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) @@ -276,6 +309,39 @@ define i32 @load_after_memset_1(ptr %a) { ret i32 %v } +define float @load_after_memset_1_float(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_float( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: ret float [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load float, ptr %a + ret float %v +} + +define i27 @load_after_memset_1_non_byte_sized(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_non_byte_sized( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i27, ptr [[A]], align 4 +; CHECK-NEXT: ret i27 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i27, ptr %a + ret i27 %v +} + +define <4 x i8> @load_after_memset_1_vec(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_vec( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load <4 x i8>, ptr [[A]], align 4 +; CHECK-NEXT: ret <4 x i8> [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load <4 x i8>, ptr %a + ret <4 x i8> %v +} + define i32 @load_after_memset_unknown(ptr %a, i8 %byte) { ; CHECK-LABEL: @load_after_memset_unknown( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 [[BYTE:%.*]], i64 16, i1 false) @@ -350,4 +416,37 @@ define i256 @load_after_memset_0_too_small(ptr %a) { ret i256 %v } +define i32 @load_after_memset_0_unknown_length(ptr %a, i64 %len) { +; CHECK-LABEL: @load_after_memset_0_unknown_length( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[A:%.*]], i8 0, i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 %len, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define i32 @load_after_memset_0_atomic(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_atomic( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load atomic i32, ptr %a seq_cst, align 4 + ret i32 %v +} + +define @load_after_memset_0_scalable(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_scalable( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load , ptr [[A]], align 4 +; CHECK-NEXT: ret [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load , ptr %a + ret %v +} + declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) From 0fb763e7d0a4b8c9f5978675e7556ae50716d695 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Mon, 17 Oct 2022 17:16:24 +0000 Subject: [PATCH 124/516] [flang] Add -f[no-]honor-infinities and -menable-no-infs Only add the option processing and store the result. No attributes are added to FIR yet. This patch follows Clang in forwarding -fno-honor-infinities as -menable-no-infs. Reviewed By: kiranchandramohan awarzynski vzakhari Differential Revision: https://reviews.llvm.org/D137072 --- clang/include/clang/Driver/Options.td | 7 +++--- clang/lib/Driver/ToolChains/Flang.cpp | 23 ++++++++++++++++++++ flang/include/flang/Frontend/LangOptions.def | 3 +++ flang/lib/Frontend/CompilerInvocation.cpp | 6 +++++ flang/test/Driver/driver-help.f90 | 1 + flang/test/Driver/flang_fp_opts.f90 | 3 ++- flang/test/Driver/frontend-forwarding.f90 | 2 ++ 7 files changed, 41 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3e15c55ed52ea..65cd6e85da4e1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5444,9 +5444,6 @@ def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, HelpText<"Specify which frame pointers to retain.">, Values<"all,non-leaf,none">, NormalizedValuesScope<"CodeGenOptions::FramePointerKind">, NormalizedValues<["All", "NonLeaf", "None"]>, MarshallingInfoEnum, "None">; -def menable_no_infinities : Flag<["-"], "menable-no-infs">, - HelpText<"Allow optimization to assume there are no infinities.">, - MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; def menable_no_nans : Flag<["-"], "menable-no-nans">, HelpText<"Allow optimization to assume there are no NaNs.">, MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; @@ -6060,6 +6057,10 @@ def split_dwarf_output : Separate<["-"], "split-dwarf-output">, let Flags = [CC1Option, FC1Option, NoDriverOption] in { +def menable_no_infinities : Flag<["-"], "menable-no-infs">, + HelpText<"Allow optimization to assume there are no infinities.">, + MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; + def pic_level : Separate<["-"], "pic-level">, HelpText<"Value for __PIC__">, MarshallingInfoInt>; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 6c6895da61299..14547b6f409aa 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -83,6 +83,7 @@ void Flang::AddPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { static void addFloatingPointOptions(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { StringRef FPContract; + bool HonorINFs = true; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -101,8 +102,30 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, << A->getOption().getName() << Val; } + for (const Arg *A : Args) { + auto optId = A->getOption().getID(); + switch (optId) { + // if this isn't an FP option, skip the claim below + default: + continue; + + case options::OPT_fhonor_infinities: + HonorINFs = true; + break; + case options::OPT_fno_honor_infinities: + HonorINFs = false; + break; + } + + // If we handled this option claim it + A->claim(); + } + if (!FPContract.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); + + if (!HonorINFs) + CmdArgs.push_back("-menable-no-infs"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index c4d0ec5329b2e..96e9ea63f1964 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -21,5 +21,8 @@ LANGOPT(Name, Bits, Default) ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Off) ///< FP Contract Mode (off/fast) +/// Permit floating point optimization without regard to infinities +LANGOPT(NoHonorInfs, 1, false) + #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 3a64086be33d3..10c73169d0d02 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -691,6 +691,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(fpContractMode); } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_menable_no_infinities)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoHonorInfs = true; + } + return true; } diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 3ab509c7129e9..068985bc6d56e 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -130,6 +130,7 @@ ! HELP-FC1-NEXT: -init-only Only execute frontend initialization ! HELP-FC1-NEXT: -I Add directory to the end of the list of include search paths ! HELP-FC1-NEXT: -load Load the named plugin (dynamic shared object) +! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities. ! HELP-FC1-NEXT: -mllvm Additional arguments to forward to LLVM's option processing ! HELP-FC1-NEXT: -mmlir Additional arguments to forward to MLIR's option processing ! HELP-FC1-NEXT: -module-dir Put MODULE files in diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index 34987f4b0c438..272ef8495b957 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -1,4 +1,5 @@ ! Test for handling of floating point options within the frontend driver -! RUN: %flang_fc1 -ffp-contract=fast %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -ffp-contract=fast -menable-no-infs %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented +! CHECK: menable-no-infs is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index b956940fd7d29..0c3fd6c48ee20 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -9,6 +9,7 @@ ! RUN: -flarge-sizes \ ! RUN: -fconvert=little-endian \ ! RUN: -ffp-contract=fast \ +! RUN: -fno-honor-infinities \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -20,5 +21,6 @@ ! CHECK: "-fdefault-real-8" ! CHECK: "-flarge-sizes" ! CHECK: "-ffp-contract=fast" +! CHECK: "-menable-no-infs" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From 5144133f6fd50d6067c808b83af90437995e441d Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Thu, 27 Oct 2022 14:38:00 +0200 Subject: [PATCH 125/516] [AMDGPU] Fix GCNDownwardRPTracker::advanceBeforeNext at the end of MBB The problem with GCNDownwardRPTracker::advanceBeforeNext is that it doesn't allow to get register pressure after the last instruction in a MBB. However when we track RP through the boundary of a MBB we need the state that is after the last instruction of the MBB and before the first instruction of the successor MBB. Currently we stop traking RP in the state 'at' the last instruction of the MBB which is incorrect. This patch fixes 27 lit tests with EXPENSIVE_CHECKS enabled. Reviewed By: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D136927 --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 18 +++++++++++------- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 ++-- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 1 - 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 2f38f7f65f80b..f9bed9a76c6fb 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -325,12 +325,14 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI, bool GCNDownwardRPTracker::advanceBeforeNext() { assert(MRI && "call reset first"); + if (!LastTrackedMI) + return NextMI == MBBEnd; - NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); - if (NextMI == MBBEnd) - return false; + assert(NextMI == MBBEnd || !NextMI->isDebugInstr()); - SlotIndex SI = LIS.getInstructionIndex(*NextMI).getBaseIndex(); + SlotIndex SI = NextMI == MBBEnd + ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot() + : LIS.getInstructionIndex(*NextMI).getBaseIndex(); assert(SI.isValid()); // Remove dead registers or mask bits. @@ -355,7 +357,9 @@ bool GCNDownwardRPTracker::advanceBeforeNext() { MaxPressure = max(MaxPressure, CurPressure); - return true; + LastTrackedMI = nullptr; + + return NextMI == MBBEnd; } void GCNDownwardRPTracker::advanceToNext() { @@ -379,9 +383,9 @@ void GCNDownwardRPTracker::advanceToNext() { } bool GCNDownwardRPTracker::advance() { - // If we have just called reset live set is actual. - if ((NextMI == MBBEnd) || (LastTrackedMI && !advanceBeforeNext())) + if (NextMI == MBBEnd) return false; + advanceBeforeNext(); advanceToNext(); return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index b6ad960a8a65f..72e18acc1b8e4 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -172,8 +172,8 @@ class GCNDownwardRPTracker : public GCNRPTracker { // Returns false if block is empty except debug values. bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); - // Move to the state right before the next MI. Returns false if reached - // end of the block. + // Move to the state right before the next MI or after the end of MBB. + // Returns false if reached end of the block. bool advanceBeforeNext(); // Move to the state at the MI, advanceBeforeNext has to be called first. diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 1577c1761aadd..25fcf422bfbe7 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -538,7 +538,6 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, RPTracker.advanceToNext(); RPTracker.advance(MBB->end()); } - RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); RPTracker.advanceBeforeNext(); MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); } From e53c4c6d8617145c4dd8d428bf47544ba4110eb5 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Thu, 3 Nov 2022 09:50:31 +0000 Subject: [PATCH 126/516] [Assignment Tracking][3/*] Add DIAssignID metadata boilerplate The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Add the DIAssignID metadata attachment boilerplate. Includes a textual-bitcode roundtrip test and tests that the verifier and parser catch badly formed IR. This piece of metadata links together stores (used as an attachment) and the yet-to-be-added llvm.dbg.assign debug intrinsic (used as an operand). Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D132222 --- llvm/include/llvm-c/DebugInfo.h | 3 +- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 3 +- llvm/include/llvm/IR/DebugInfoMetadata.h | 36 +++++++++++++++++++ llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/Metadata.def | 1 + llvm/lib/AsmParser/LLParser.cpp | 18 ++++++++++ llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 13 +++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 11 ++++++ llvm/lib/IR/AsmWriter.cpp | 6 ++++ llvm/lib/IR/DebugInfo.cpp | 8 +++-- llvm/lib/IR/DebugInfoMetadata.cpp | 7 ++++ llvm/lib/IR/Verifier.cpp | 17 +++++++++ .../parse-and-verify/distinct.ll | 9 +++++ .../parse-and-verify/instruction-type.ll | 36 +++++++++++++++++++ .../parse-and-verify/operands.ll | 9 +++++ .../parse-and-verify/roundtrip.ll | 33 +++++++++++++++++ 16 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 8554a01998736..ef6a147eb2a52 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -169,7 +169,8 @@ enum { LLVMDICommonBlockMetadataKind, LLVMDIStringTypeMetadataKind, LLVMDIGenericSubrangeMetadataKind, - LLVMDIArgListMetadataKind + LLVMDIArgListMetadataKind, + LLVMDIAssignIDMetadataKind, }; typedef unsigned LLVMMetadataKind; diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index ee5669c6c6aa8..74a51d5ce6907 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -349,7 +349,8 @@ enum MetadataCodes { // info. METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] METADATA_GENERIC_SUBRANGE = 45, // [distinct, count, lo, up, stride] - METADATA_ARG_LIST = 46 // [n x [type num, value num]] + METADATA_ARG_LIST = 46, // [n x [type num, value num]] + METADATA_ASSIGN_ID = 47, // [distinct, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 5b20bf3ade99a..f57691f6f9fc6 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -215,6 +215,7 @@ class DINode : public MDNode { case DIImportedEntityKind: case DIModuleKind: case DIGenericSubrangeKind: + case DIAssignIDKind: return true; } } @@ -295,6 +296,41 @@ class GenericDINode : public DINode { } }; +/// Assignment ID. +/// Used to link stores (as an attachment) and dbg.assigns (as an operand). +/// DIAssignID metadata is never uniqued as we compare instances using +/// referential equality (the instance/address is the ID). +class DIAssignID : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + + DIAssignID(LLVMContext &C, StorageType Storage) + : MDNode(C, DIAssignIDKind, Storage, None) {} + + ~DIAssignID() { dropAllReferences(); } + + static DIAssignID *getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate = true); + + TempDIAssignID cloneImpl() const { return getTemporary(getContext()); } + +public: + // This node has no operands to replace. + void replaceOperandWith(unsigned I, Metadata *New) = delete; + + static DIAssignID *getDistinct(LLVMContext &Context) { + return getImpl(Context, Distinct); + } + static TempDIAssignID getTemporary(LLVMContext &Context) { + return TempDIAssignID(getImpl(Context, Temporary)); + } + // NOTE: Do not define get(LLVMContext&) - see class comment. + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIAssignIDKind; + } +}; + /// Array subrange. /// /// TODO: Merge into node for DW_TAG_array_type, which should have a custom diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 3d986325c5d33..8723bf2a0680c 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -49,3 +49,4 @@ LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) +LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index bbf349e6b508c..36c34c1d2347c 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -110,6 +110,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILabel) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_LEAF(DIAssignID) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 0fda0559b5b41..43e47aa33c863 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4646,6 +4646,24 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { return false; } +/// parseDIAssignID: +/// ::= distinct !DIAssignID() +bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { + if (!IsDistinct) + return Lex.Error("missing 'distinct', required for !DIAssignID()"); + + Lex.Lex(); + + // Now eat the parens. + if (parseToken(lltok::lparen, "expected '(' here")) + return true; + if (parseToken(lltok::rparen, "expected ')' here")) + return true; + + Result = DIAssignID::getDistinct(Context); + return false; +} + /// parseGenericDINode: /// ::= !GenericDINode(tag: 15, header: "...", operands: {...}) bool LLParser::parseGenericDINode(MDNode *&Result, bool IsDistinct) { diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 02d76f61695af..1ac1502e8aefb 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -856,6 +856,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { case bitc::METADATA_TEMPLATE_VALUE: case bitc::METADATA_GLOBAL_VAR: case bitc::METADATA_LOCAL_VAR: + case bitc::METADATA_ASSIGN_ID: case bitc::METADATA_LABEL: case bitc::METADATA_EXPRESSION: case bitc::METADATA_OBJC_PROPERTY: @@ -1964,6 +1965,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } + case bitc::METADATA_ASSIGN_ID: { + if (Record.size() != 1) + return error("Invalid DIAssignID record."); + + IsDistinct = Record[0] & 1; + if (!IsDistinct) + return error("Invalid DIAssignID record. Must be distinct"); + + MetadataList.assignValue(DIAssignID::getDistinct(Context), NextMetadataNo); + NextMetadataNo++; + break; + } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. if (Record.size() < 8 || Record.size() > 10) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1ac4413f158eb..bed3ebad9874e 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,6 +340,8 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { unsigned Abbrev); void writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev); + void writeDIAssignID(const DIAssignID *N, SmallVectorImpl &Record, + unsigned Abbrev); void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev); @@ -1955,6 +1957,15 @@ void ModuleBitcodeWriter::writeDIModule(const DIModule *N, Record.clear(); } +void ModuleBitcodeWriter::writeDIAssignID(const DIAssignID *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + // There are no arguments for this metadata type. + Record.push_back(N->isDistinct()); + Stream.EmitRecord(bitc::METADATA_ASSIGN_ID, Record, Abbrev); + Record.clear(); +} + void ModuleBitcodeWriter::writeDITemplateTypeParameter( const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 21e662bed6b25..d49b8710bc9a4 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1865,6 +1865,12 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL, Out << ")"; } +static void writeDIAssignID(raw_ostream &Out, const DIAssignID *DL, + AsmWriterContext &WriterCtx) { + Out << "!DIAssignID()"; + MDFieldPrinter Printer(Out, WriterCtx); +} + static void writeDISubrange(raw_ostream &Out, const DISubrange *N, AsmWriterContext &WriterCtx) { Out << "!DISubrange("; diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 8f6d58cb90b90..d30fca63067c0 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -469,9 +469,13 @@ bool llvm::stripDebugInfo(Function &F) { if (NewLoopID != LoopID) I.setMetadata(LLVMContext::MD_loop, NewLoopID); } - // Strip heapallocsite attachments, they point into the DIType system. - if (I.hasMetadataOtherThanDebugLoc()) + // Strip other attachments that are or use debug info. + if (I.hasMetadataOtherThanDebugLoc()) { + // Heapallocsites point into the DIType system. I.setMetadata("heapallocsite", nullptr); + // DIAssignID are debug info metadata primitives. + I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); + } } } return Changed; diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 9b4f92a63c5e2..2359e56c08684 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1253,6 +1253,13 @@ bool DIExpression::startsWithDeref() const { return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; } +DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate) { + // Uniqued DIAssignID are not supported as the instance address *is* the ID. + assert(Storage != StorageType::Uniqued && "uniqued DIAssignID unsupported"); + return new (0u, Storage) DIAssignID(Context, Storage); +} + unsigned DIExpression::ExprOperand::getSize() const { uint64_t Op = getOp(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 0614f206981a1..c9b2d6aa5abf2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -471,6 +471,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitCallStackMetadata(MDNode *MD); void visitMemProfMetadata(Instruction &I, MDNode *MD); void visitCallsiteMetadata(Instruction &I, MDNode *MD); + void visitDIAssignIDMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -1483,6 +1484,11 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { CheckDI(!isa(Ty), "invalid type", &N, N.getType()); } +void Verifier::visitDIAssignID(const DIAssignID &N) { + CheckDI(!N.getNumOperands(), "DIAssignID has no arguments", &N); + CheckDI(N.isDistinct(), "DIAssignID must be distinct", &N); +} + void Verifier::visitDILabel(const DILabel &N) { if (auto *S = N.getRawScope()) CheckDI(isa(S), "invalid scope", &N, S); @@ -4549,6 +4555,14 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } +void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { + assert(I.hasMetadata(LLVMContext::MD_DIAssignID)); + bool ExpectedInstTy = + isa(I) || isa(I) || isa(I); + CheckDI(ExpectedInstTy, "!DIAssignID attached to unexpected instruction kind", + I, MD); +} + void Verifier::visitCallStackMetadata(MDNode *MD) { // Call stack metadata should consist of a list of at least 1 constant int // (representing a hash of the location). @@ -4850,6 +4864,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) visitCallsiteMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_DIAssignID)) + visitDIAssignIDMetadata(I, MD); + if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll new file mode 100644 index 0000000000000..2cc5452fe7d2a --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: missing 'distinct', required for !DIAssignID() + +!1 = !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll new file mode 100644 index 0000000000000..d0f447ee200b6 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll @@ -0,0 +1,36 @@ +; RUN: opt -S %s -verify -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; NOTE: Expect opt to return zero because the badly formed debug info +;; is going to be stripped. + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +;; Check verifier output. +; CHECK: !DIAssignID attached to unexpected instruction kind + +;; Check DIAssignID is stripped from IR. +; CHECK: define dso_local void @fun() { +; CHECK-NOT: DIAssignID + +define dso_local void @fun() !dbg !7 { +entry: + ret void, !DIAssignID !14 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!14 = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll new file mode 100644 index 0000000000000..79adcb9ce2d12 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: expected ')' here + +!1 = distinct !DIAssignID(0) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll new file mode 100644 index 0000000000000..1ddb95b79b0f0 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll @@ -0,0 +1,33 @@ +; RUN: opt %s -verify -experimental-assignment-tracking \ +; RUN: | opt -verify -S -experimental-assignment-tracking \ +; RUN: | FileCheck %s + +;; Roundtrip test (text -> bitcode -> text) for DIAssignID attachments. + +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID:[0-9]+]] +; CHECK-DAG: ![[ID]] = distinct !DIAssignID() + +define dso_local void @fun() !dbg !7 { +entry: + %local = alloca i32, align 4, !DIAssignID !14 + ret void, !dbg !13 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 3, column: 1, scope: !7) +!14 = distinct !DIAssignID() From cdbe296853b1b3fc6415236f05770360e23f0d39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Thu, 13 Oct 2022 23:19:16 +0200 Subject: [PATCH 127/516] [clang-format] Fix lambda formatting in conditional Without the patch UnwrappedLineFormatter::analyzeSolutionSpace just ran out of possible formattings and would put everything just on one line. The problem was that the the line break was forbidden, but putting the conditional colon on the same line is also forbidden. Differential Revision: https://reviews.llvm.org/D135918 --- clang/lib/Format/ContinuationIndenter.cpp | 9 +++++ clang/unittests/Format/FormatTest.cpp | 48 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index f1b5d184963ce..3fa3e6bcbb569 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -331,6 +331,15 @@ bool ContinuationIndenter::canBreak(const LineState &State) { if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr)) return false; + if (Current.is(TT_ConditionalExpr) && Previous.is(tok::r_paren) && + Previous.MatchingParen && Previous.MatchingParen->Previous && + Previous.MatchingParen->Previous->MatchingParen && + Previous.MatchingParen->Previous->MatchingParen->is(TT_LambdaLBrace)) { + // We have a lambda within a conditional expression, allow breaking here. + assert(Previous.MatchingParen->Previous->is(tok::r_brace)); + return true; + } + return !CurrentState.NoLineBreak; } diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 82e91b3222715..acf172ea98d9d 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -25549,6 +25549,54 @@ TEST_F(FormatTest, ShortTemplatedArgumentLists) { verifyFormat("template struct Foo {};", Style); } +TEST_F(FormatTest, MultilineLambdaInConditional) { + auto Style = getLLVMStyleWithColumns(70); + verifyFormat("auto aLengthyIdentifier = oneExpressionSoThatWeBreak ? []() {\n" + " ;\n" + " return 5;\n" + "}()\n" + " : 2;", + Style); + verifyFormat( + "auto aLengthyIdentifier = oneExpressionSoThatWeBreak ? 2 : []() {\n" + " ;\n" + " return 5;\n" + "}();", + Style); + + Style = getLLVMStyleWithColumns(60); + verifyFormat("auto aLengthyIdentifier = oneExpressionSoThatWeBreak\n" + " ? []() {\n" + " ;\n" + " return 5;\n" + " }()\n" + " : 2;", + Style); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak ? 2 : []() {\n" + " ;\n" + " return 5;\n" + " }();", + Style); + + Style = getLLVMStyleWithColumns(40); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak ? []() {\n" + " ;\n" + " return 5;\n" + " }()\n" + " : 2;", + Style); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak\n" + " ? 2\n" + " : []() {\n" + " ;\n" + " return 5;\n" + " };", + Style); +} + TEST_F(FormatTest, AlignAfterOpenBracketBlockIndent) { auto Style = getLLVMStyle(); From f97639ce13754e78e26f8d7f564830ddfe4f727c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Sch=C3=A4pers?= Date: Mon, 24 Oct 2022 21:21:31 +0200 Subject: [PATCH 128/516] [clang-format] Don't misannotate in CTor init list They were annotated with TrailingAnnotation, which they are not. And that resulted in some quirky formatting in some cases. Differential Revision: https://reviews.llvm.org/D136635 --- clang/lib/Format/TokenAnnotator.cpp | 4 ++- clang/unittests/Format/TokenAnnotatorTest.cpp | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4901c5ce71066..3d76cc171b0dc 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1989,7 +1989,9 @@ class AnnotatingParser { } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept, tok::kw_requires) && Current.Previous && - !Current.Previous->isOneOf(tok::equal, tok::at) && + !Current.Previous->isOneOf(tok::equal, tok::at, + TT_CtorInitializerComma, + TT_CtorInitializerColon) && Line.MightBeFunctionDecl && Contexts.size() == 1) { // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b4e27d35bc369..fa95f6845f077 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1124,6 +1124,35 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { EXPECT_TOKEN(Tokens[9], tok::colon, TT_GotoLabelColon); } +TEST_F(TokenAnnotatorTest, UnderstandConstructors) { + auto Tokens = annotate("Class::Class() : BaseClass(), Member() {}"); + + // The TT_Unknown is clearly not binding for the future, please adapt if those + // tokens get annotated. + ASSERT_EQ(Tokens.size(), 16u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::colon, TT_CtorInitializerColon); + EXPECT_TOKEN(Tokens[6], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[9], tok::comma, TT_CtorInitializerComma); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[12], tok::r_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); + + Tokens = annotate("Class::Class() : BaseClass{}, Member{} {}"); + ASSERT_EQ(Tokens.size(), 16u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::colon, TT_CtorInitializerColon); + EXPECT_TOKEN(Tokens[6], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::r_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[9], tok::comma, TT_CtorInitializerComma); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[11], tok::l_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[12], tok::r_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); +} + } // namespace } // namespace format } // namespace clang From 691774d4030d9b7f2941946d9a78acce92f87310 Mon Sep 17 00:00:00 2001 From: Yusuke Kadowaki Date: Thu, 3 Nov 2022 13:10:48 +0100 Subject: [PATCH 129/516] [clang-format][NFC] Fix document of AlignTrailingComments The documentation of the patch https://reviews.llvm.org/D132131 looks disorganized on the website https://clang.llvm.org/docs/ClangFormatStyleOptions.html. This patch tries to fix that. Differential Revision: https://reviews.llvm.org/D137075 --- clang/docs/ClangFormatStyleOptions.rst | 10 ++++++---- clang/include/clang/Format/Format.h | 11 +++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 0416190a2fd52..44f05cf28270b 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -865,7 +865,7 @@ the configuration (without a prefix: ``Auto``). Alignment options * ``TrailingCommentsAlignmentKinds Kind`` - Specifies the way to align trailing comments + Specifies the way to align trailing comments. Possible values: @@ -903,8 +903,9 @@ the configuration (without a prefix: ``Auto``). int abcd; // comment - * ``unsigned OverEmptyLines`` How many empty lines to apply alignment - With ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 2, + * ``unsigned OverEmptyLines`` How many empty lines to apply alignment. + When both ``MaxEmptyLinesToKeep`` and ``OverEmptyLines`` are set to 2, + it formats like below. .. code-block:: c++ @@ -915,7 +916,8 @@ the configuration (without a prefix: ``Auto``). int abcdef; // aligned - And with ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 1, + When ``MaxEmptyLinesToKeep`` is set to 2 and ``OverEmptyLines`` is set + to 1, it formats like below. .. code-block:: c++ diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 6981dc158d241..3cadb6304dced 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -402,10 +402,11 @@ struct FormatStyle { /// Alignment options struct TrailingCommentsAlignmentStyle { - /// Specifies the way to align trailing comments + /// Specifies the way to align trailing comments. TrailingCommentsAlignmentKinds Kind; - /// How many empty lines to apply alignment - /// With ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 2, + /// How many empty lines to apply alignment. + /// When both ``MaxEmptyLinesToKeep`` and ``OverEmptyLines`` are set to 2, + /// it formats like below. /// \code /// int a; // all these /// @@ -414,7 +415,9 @@ struct FormatStyle { /// /// int abcdef; // aligned /// \endcode - /// And with ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 1, + /// + /// When ``MaxEmptyLinesToKeep`` is set to 2 and ``OverEmptyLines`` is set + /// to 1, it formats like below. /// \code /// int a; // these are /// From 81181089c6d835f2f506bb125bb81ab5d397e3d6 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Fri, 2 Sep 2022 10:18:12 +0100 Subject: [PATCH 130/516] [Flang][Driver]Add datalayout before doing LLVM-IR transformation The earlier available datalyaout allows MLIR to LLVM-IR transformation to use the datalayout for decisions, such as comparing sizes for different types of integers. This should solve https://github.com/llvm/llvm-project/issues/57230 Reviewed By: awarzynski, vzakhari Differential Revision: https://reviews.llvm.org/D133568 --- .../include/flang/Frontend/FrontendActions.h | 2 +- .../include/flang/Optimizer/Support/InitFIR.h | 2 +- flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Frontend/FrontendActions.cpp | 47 +++++++++++++++---- flang/test/Driver/emit-llvm.f90 | 1 + flang/test/Driver/emit-mlir.f90 | 2 + flang/test/Driver/pic-flags.f90 | 2 +- .../unittests/Frontend/FrontendActionTest.cpp | 5 ++ 8 files changed, 49 insertions(+), 13 deletions(-) diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index 975aaa0b9da27..3f50d320c1f3b 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -199,7 +199,7 @@ class CodeGenAction : public FrontendAction { void executeAction() override; /// Runs prescan, parsing, sema and lowers to MLIR. bool beginSourceFileAction() override; - /// Sets up LLVM's TargetMachine, configures llvmModule accordingly. + /// Sets up LLVM's TargetMachine. void setUpTargetMachine(); /// Runs the optimization (aka middle-end) pipeline on the LLVM module /// associated with this action. diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index bbc50dcec2e73..12bd80e7abd4f 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -32,7 +32,7 @@ namespace fir::support { mlir::scf::SCFDialect, mlir::arith::ArithDialect, \ mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \ mlir::vector::VectorDialect, mlir::math::MathDialect, \ - mlir::complex::ComplexDialect + mlir::complex::ComplexDialect, mlir::DLTIDialect // The definitive list of dialects used by flang. #define FLANG_DIALECT_LIST \ diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index 4abca70acaba0..fac5f2c1a1f87 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -36,6 +36,7 @@ add_flang_library(flangFrontend MLIRTransforms MLIRLLVMToLLVMIRTranslation MLIRSCFToControlFlow + MLIRTargetLLVMIRImport ${dialect_libs} LINK_COMPONENTS diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index dfd2089cb9f93..9042332822c8c 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -34,6 +34,8 @@ #include "mlir/IR/Dialect.h" #include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/Import.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticFrontend.h" @@ -79,6 +81,16 @@ bool PrescanAndSemaDebugAction::beginSourceFileAction() { (generateRtTypeTables() || true); } +static void setMLIRDataLayout(mlir::ModuleOp &mlirModule, + const llvm::DataLayout &dl) { + mlir::MLIRContext *context = mlirModule.getContext(); + mlirModule->setAttr( + mlir::LLVM::LLVMDialect::getDataLayoutAttrName(), + mlir::StringAttr::get(context, dl.getStringRepresentation())); + mlir::DataLayoutSpecInterface dlSpec = mlir::translateDataLayout(dl, context); + mlirModule->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec); +} + bool CodeGenAction::beginSourceFileAction() { llvmCtx = std::make_unique(); CompilerInstance &ci = this->getInstance(); @@ -123,6 +135,9 @@ bool CodeGenAction::beginSourceFileAction() { } mlirModule = std::make_unique(module.release()); + setUpTargetMachine(); + const llvm::DataLayout &dl = tm->createDataLayout(); + setMLIRDataLayout(*mlirModule, dl); return true; } @@ -152,10 +167,15 @@ bool CodeGenAction::beginSourceFileAction() { kindMap, ci.getInvocation().getLoweringOpts(), ci.getInvocation().getFrontendOpts().envDefaults); + // Fetch module from lb, so we can set + mlirModule = std::make_unique(lb.getModule()); + setUpTargetMachine(); + const llvm::DataLayout &dl = tm->createDataLayout(); + setMLIRDataLayout(*mlirModule, dl); + // Create a parse tree and lower it to FIR Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()}; lb.lower(parseTree, ci.getInvocation().getSemanticsContext()); - mlirModule = std::make_unique(lb.getModule()); // run the default passes. mlir::PassManager pm(mlirCtx.get(), mlir::OpPassManager::Nesting::Implicit); @@ -565,13 +585,7 @@ getCGOptLevel(const Fortran::frontend::CodeGenOptions &opts) { void CodeGenAction::setUpTargetMachine() { CompilerInstance &ci = this->getInstance(); - // Set the triple based on the CompilerInvocation set-up const std::string &theTriple = ci.getInvocation().getTargetOpts().triple; - if (llvmModule->getTargetTriple() != theTriple) { - ci.getDiagnostics().Report(clang::diag::warn_fe_override_module) - << theTriple; - llvmModule->setTargetTriple(theTriple); - } // Create `Target` std::string error; @@ -735,6 +749,22 @@ void CodeGenAction::executeAction() { if (!llvmModule) generateLLVMIR(); + // Set the triple based on the targetmachine (this comes compiler invocation + // and the command-line target option if specified, or the default if not + // given on the command-line). + setUpTargetMachine(); + const std::string &theTriple = tm->getTargetTriple().str(); + + if (llvmModule->getTargetTriple() != theTriple) { + ci.getDiagnostics().Report(clang::diag::warn_fe_override_module) + << theTriple; + } + // Always set the triple and data layout, to make sure they match and are set. + // Note that this overwrites any datalayout stored in the LLVM-IR. This avoids + // an assert for incompatible data layout when the code-generation happens. + llvmModule->setTargetTriple(theTriple); + llvmModule->setDataLayout(tm->createDataLayout()); + // Run LLVM's middle-end (i.e. the optimizer). runOptimizationPipeline(*os); @@ -744,9 +774,6 @@ void CodeGenAction::executeAction() { return; } - setUpTargetMachine(); - llvmModule->setDataLayout(tm->createDataLayout()); - if (action == BackendActionTy::Backend_EmitBC) { // This action has effectively been completed in runOptimizationPipeline. return; diff --git a/flang/test/Driver/emit-llvm.f90 b/flang/test/Driver/emit-llvm.f90 index 8e864421529eb..32a5a044f2b08 100644 --- a/flang/test/Driver/emit-llvm.f90 +++ b/flang/test/Driver/emit-llvm.f90 @@ -6,6 +6,7 @@ ! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s ! CHECK: ; ModuleID = 'FIRModule' +! CHECK: target datalayout = ! CHECK: define void @_QQmain() ! CHECK-NEXT: ret void ! CHECK-NEXT: } diff --git a/flang/test/Driver/emit-mlir.f90 b/flang/test/Driver/emit-mlir.f90 index 9391195c94339..191ee13396ef9 100644 --- a/flang/test/Driver/emit-mlir.f90 +++ b/flang/test/Driver/emit-mlir.f90 @@ -10,6 +10,8 @@ ! RUN: %flang_fc1 -emit-mlir emit-mlir.f90 && ls emit-mlir.mlir ! CHECK: module attributes { +! CHECK-SAME: dlti.dl_spec = +! CHECK-SAME: llvm.data_layout = ! CHECK-LABEL: func @_QQmain() { ! CHECK-NEXT: return ! CHECK-NEXT: } diff --git a/flang/test/Driver/pic-flags.f90 b/flang/test/Driver/pic-flags.f90 index 2f4842f72cadf..fb6ab701c3820 100644 --- a/flang/test/Driver/pic-flags.f90 +++ b/flang/test/Driver/pic-flags.f90 @@ -1,3 +1,4 @@ +! REQUIRES: aarch64-registered-target && x86-registered-target && arm-registered-target ! RUN: %flang -v -S -emit-llvm -o - %s --target=aarch64-linux-gnu -fno-pie 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC,CHECK-STATIC-IR ! RUN: %flang -v -S -emit-llvm -o - %s --target=aarch64-linux-gnu 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PIE-LEVEL2,CHECK-PIE-LEVEL2-IR @@ -14,7 +15,6 @@ ! RUN: %flang -v -### -o - %s --target=arm-none-eabi -frwpi 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-RWPI ! RUN: %flang -v -### -o - %s --target=arm-none-eabi -fropi -frwpi 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-ROPI-RWPI - ! CHECK: -fc1 diff --git a/flang/unittests/Frontend/FrontendActionTest.cpp b/flang/unittests/Frontend/FrontendActionTest.cpp index 9b289523e761b..f2727656b5eb0 100644 --- a/flang/unittests/Frontend/FrontendActionTest.cpp +++ b/flang/unittests/Frontend/FrontendActionTest.cpp @@ -178,6 +178,11 @@ TEST_F(FrontendActionTest, EmitLLVM) { compInst.getInvocation().getTargetOpts().triple = llvm::Triple::normalize(llvm::sys::getDefaultTargetTriple()); + // Initialise LLVM backend + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmPrinters(); + // Set-up the output stream. We are using output buffer wrapped as an output // stream, as opposed to an actual file (or a file descriptor). llvm::SmallVector outputFileBuffer; From 44d8f80b267de972bdb5a68d5060addc423e637c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Nov 2022 12:20:23 +0000 Subject: [PATCH 131/516] [ConstraintElim] Use ConstantInt::getTrue to create constants (NFC). Use existing ConstantInt::getTrue/getFalse functionality instead of custom getScalarConstOrSplat as suggested by @nikic. --- llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index a78bfbb54144b..9c304d1f955c8 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -746,12 +746,6 @@ void State::addInfoFor(BasicBlock &BB) { WorkList.emplace_back(DT.getNode(Br->getSuccessor(1)), CmpI, true); } -static Constant *getScalarConstOrSplat(ConstantInt *C, Type *Ty) { - if (auto *VTy = dyn_cast(Ty)) - return ConstantVector::getSplat(VTy->getElementCount(), C); - return C; -} - static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); @@ -784,7 +778,6 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { }); bool Changed = false; - LLVMContext &Ctx = Cmp->getModule()->getContext(); if (CSToUse.isConditionImplied(R.Coefficients)) { if (!DebugCounter::shouldExecute(EliminatedCounter)) return false; @@ -794,7 +787,7 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); Constant *TrueC = - getScalarConstOrSplat(ConstantInt::getTrue(Ctx), Cmp->getType()); + ConstantInt::getTrue(CmpInst::makeCmpResultType(Cmp->getType())); Cmp->replaceUsesWithIf(TrueC, [](Use &U) { // Conditions in an assume trivially simplify to true. Skip uses // in assume calls to not destroy the available information. @@ -813,7 +806,7 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); Constant *FalseC = - getScalarConstOrSplat(ConstantInt::getFalse(Ctx), Cmp->getType()); + ConstantInt::getFalse(CmpInst::makeCmpResultType(Cmp->getType())); Cmp->replaceAllUsesWith(FalseC); NumCondsRemoved++; Changed = true; From 742970920b7a7fc2fe1cb6bca6fb04f03ab7d5d9 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 1 Nov 2022 17:37:06 +0100 Subject: [PATCH 132/516] [Clang] Implement CWG2358 Explicit capture of value Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D137172 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaDeclCXX.cpp | 62 ++++++++++++------- .../dcl.meaning/dcl.fct.default/p7.cpp | 4 +- clang/test/CXX/drs/dr23xx.cpp | 10 +++ .../expr/expr.prim/expr.prim.lambda/p13.cpp | 19 +++++- clang/www/cxx_dr_status.html | 2 +- 6 files changed, 71 insertions(+), 27 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 763f4cece4634..ebf280f4da4a8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -574,6 +574,7 @@ C++ Language Changes in Clang This means Clang will by default accept code using features from C++17 and conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. +- Implemented DR2358 allowing init captures in lambdas in default arguments. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 80b748cbb3744..ea7997b347959 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -102,24 +102,31 @@ bool CheckDefaultArgumentVisitor::VisitDeclRefExpr(const DeclRefExpr *DRE) { return S.Diag(DRE->getBeginLoc(), diag::err_param_default_argument_references_param) << Param->getDeclName() << DefaultArg->getSourceRange(); - } else if (const auto *VDecl = dyn_cast(Decl)) { - // C++ [dcl.fct.default]p7: - // Local variables shall not be used in default argument - // expressions. - // - // C++17 [dcl.fct.default]p7 (by CWG 2082): - // A local variable shall not appear as a potentially-evaluated - // expression in a default argument. - // - // C++20 [dcl.fct.default]p7 (DR as part of P0588R1, see also CWG 2346): - // Note: A local variable cannot be odr-used (6.3) in a default argument. - // - if (VDecl->isLocalVarDecl() && !DRE->isNonOdrUse()) - return S.Diag(DRE->getBeginLoc(), - diag::err_param_default_argument_references_local) - << VDecl->getDeclName() << DefaultArg->getSourceRange(); + } else { + const VarDecl *VD = nullptr; + if (const auto *BD = dyn_cast(Decl)) + VD = dyn_cast_if_present(BD->getDecomposedDecl()); + else + VD = dyn_cast(Decl); + if (VD) { + // C++ [dcl.fct.default]p7: + // Local variables shall not be used in default argument + // expressions. + // + // C++17 [dcl.fct.default]p7 (by CWG 2082): + // A local variable shall not appear as a potentially-evaluated + // expression in a default argument. + // + // C++20 [dcl.fct.default]p7 (DR as part of P0588R1, see also CWG 2346): + // Note: A local variable cannot be odr-used (6.3) in a default + // argument. + // + if (VD->isLocalVarDecl() && !DRE->isNonOdrUse()) + return S.Diag(DRE->getBeginLoc(), + diag::err_param_default_argument_references_local) + << Decl->getDeclName() << DefaultArg->getSourceRange(); + } } - return false; } @@ -149,13 +156,20 @@ bool CheckDefaultArgumentVisitor::VisitPseudoObjectExpr( } bool CheckDefaultArgumentVisitor::VisitLambdaExpr(const LambdaExpr *Lambda) { - // C++11 [expr.lambda.prim]p13: - // A lambda-expression appearing in a default argument shall not - // implicitly or explicitly capture any entity. - if (Lambda->capture_begin() == Lambda->capture_end()) - return false; - - return S.Diag(Lambda->getBeginLoc(), diag::err_lambda_capture_default_arg); + // [expr.prim.lambda.capture]p9 + // a lambda-expression appearing in a default argument cannot implicitly or + // explicitly capture any local entity. Such a lambda-expression can still + // have an init-capture if any full-expression in its initializer satisfies + // the constraints of an expression appearing in a default argument. + bool Invalid = false; + for (const LambdaCapture &LC : Lambda->captures()) { + if (!Lambda->isInitCapture(&LC)) + return S.Diag(LC.getLocation(), diag::err_lambda_capture_default_arg); + // Init captures are always VarDecl. + auto *D = cast(LC.getCapturedVar()); + Invalid |= Visit(D->getInit()); + } + return Invalid; } } // namespace diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp index 52986faa4e859..97b227222eb09 100644 --- a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp +++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp @@ -27,5 +27,7 @@ void h() { struct S { int i; }; auto [x] = S(); - extern void h7(int = x); // FIXME: reject + extern void h7(int = x); + // expected-error@-1 {{default argument references local variable 'x' of enclosing function}} + } diff --git a/clang/test/CXX/drs/dr23xx.cpp b/clang/test/CXX/drs/dr23xx.cpp index 8d6b4a5dc16ea..371ead504bf32 100644 --- a/clang/test/CXX/drs/dr23xx.cpp +++ b/clang/test/CXX/drs/dr23xx.cpp @@ -89,6 +89,16 @@ namespace dr2353 { // dr2353: 9 #pragma clang __debug dump not_use_2 } +#if __cplusplus >= 201402L +namespace dr2358 { // dr2358: 16 + void f2() { + int i = 1; + void g1(int = [xxx=1] { return xxx; }()); // OK + void g2(int = [xxx=i] { return xxx; }()); // expected-error {{default argument references local variable 'i' of enclosing function}} + } +} +#endif + #if __cplusplus >= 201707L // Otherwise, if the qualified-id std::tuple_size names a complete class // type **with a member value**, the expression std::tuple_size::value shall diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp index b55beb7d4ed78..0635a01466afb 100644 --- a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp +++ b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp @@ -1,4 +1,8 @@ -// RUN: %clang_cc1 -std=c++11 %s -Wunused -Wno-unused-lambda-capture -verify +// RUN: %clang_cc1 -std=c++11 %s -Wunused -Wno-unused-lambda-capture -Wno-c++14-extensions -verify +// RUN: %clang_cc1 -std=c++17 %s -Wunused -Wno-unused-lambda-capture -Wno-c++14-extensions -verify + + +const int global = 0; void f2() { int i = 1; @@ -7,7 +11,20 @@ void f2() { void g3(int = ([=]{ return i; })()); // expected-error{{lambda expression in default argument cannot capture any entity}} void g4(int = ([=]{ return 0; })()); void g5(int = ([]{ return sizeof i; })()); + void g6(int = ([x=1, y = global, &z = global]{ return x; })()); + void g7(int = ([x=i, &y=i]{ return x; })()); // expected-error 2{{default argument references local variable 'i' of enclosing function}} +} + +#if __cplusplus >= 201703L +int global_array[] = { 1, 2 }; +auto [ga, gb] = global_array; + +void structured_bindings() { + int array[] = { 1, 2 }; + auto [a, b] = array; + void func(int c = [x = a, &xref = a, y = ga, &yref = ga] { return x; }()); // expected-error 2{{default argument references local variable 'a' of enclosing function}} } +#endif namespace lambda_in_default_args { int f(int = [] () -> int { int n; return ++n; } ()); diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 24beb7fc88b65..2affb6cec8e4f 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -13956,7 +13956,7 @@

C++ defect report implementation status

2358 CD5 Explicit capture of value - Unknown + Clang 16 2359 From 3ee1882299f9f7d62c10093fa968f0b16669df0c Mon Sep 17 00:00:00 2001 From: bipmis Date: Thu, 3 Nov 2022 12:28:24 +0000 Subject: [PATCH 133/516] Add another test which breaks the load insert point --- .../AggressiveInstCombine/AArch64/or-load.ll | 52 +++++++++++++++++++ .../AggressiveInstCombine/X86/or-load.ll | 52 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 822af07c7fada..24febe4ed8fbc 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1820,3 +1820,55 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert2( +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3 +; LE-NEXT: store i8 0, ptr [[P3]], align 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l1 = load i8, ptr %p + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index b043bb75313c5..7eb3fc31ba6d6 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1940,3 +1940,55 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_badinsert2( +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3 +; LE-NEXT: store i8 0, ptr [[P3]], align 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l1 = load i8, ptr %p + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} From b8ceb9f4e4bdb69b5c3ea1ccf8505fa244ca0a1e Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 3 Nov 2022 08:27:56 -0400 Subject: [PATCH 134/516] [C++20] Diagnose invalid and reserved module names [module.unit]p1 specifies that module and import are invalid components of a module name, that module names cannot contain reserved identifiers, and that std followed by zero or more digits is reserved. The first issue (module and import pseudo-keywords) requires a diagnostic, the second issue (use of reserved identifiers) does not require a diagnostic. We diagnose both the same -- the code is ill- formed unless the module declaration is in a system "header". This allows STL implementations to use the reserved module names while preventing users from stealing them out from under us. Differential Revision: https://reviews.llvm.org/D136953 --- clang/docs/ReleaseNotes.rst | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/lib/Sema/SemaModule.cpp | 57 +++++++++++++++++++ .../CodeGenCXX/cxx20-module-std-subst-1.cppm | 2 + clang/test/Modules/pair-unambiguous-ctor.cppm | 6 ++ clang/test/Modules/reserved-names-1.cpp | 45 +++++++++++++++ clang/test/Modules/reserved-names-2.cpp | 6 ++ clang/test/Modules/reserved-names-3.cpp | 7 +++ clang/test/Modules/reserved-names-4.cpp | 7 +++ .../reserved-names-system-header-1.cpp | 7 +++ .../reserved-names-system-header-2.cpp | 7 +++ 11 files changed, 149 insertions(+) create mode 100644 clang/test/Modules/reserved-names-1.cpp create mode 100644 clang/test/Modules/reserved-names-2.cpp create mode 100644 clang/test/Modules/reserved-names-3.cpp create mode 100644 clang/test/Modules/reserved-names-4.cpp create mode 100644 clang/test/Modules/reserved-names-system-header-1.cpp create mode 100644 clang/test/Modules/reserved-names-system-header-2.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ebf280f4da4a8..73d7aff9b8910 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -377,6 +377,9 @@ Improvements to Clang's diagnostics `Issue 58673 `_. - Better diagnostics when the user has missed `auto` in a declaration. `Issue 49129 `_. +- Clang now diagnoses use of invalid or reserved module names in a module + export declaration. Both are diagnosed as an error, but the diagnostic is + suppressed for use of reserved names in a system header. Non-comprehensive list of changes in this release ------------------------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a3da8abde58a6..1b1db765fa7a9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11207,6 +11207,8 @@ def err_private_module_fragment_not_module_interface : Error< "private module fragment in module implementation unit">; def note_not_module_interface_add_export : Note< "add 'export' here if this is intended to be a module interface unit">; +def err_invalid_module_name : Error< + "%0 is %select{an invalid|a reserved}1 name for a module">; def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn< "ambiguous use of internal linkage declaration %0 defined in multiple modules">, diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 4b01f109fc881..19e2c206375bd 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -144,6 +144,37 @@ void Sema::HandleStartOfHeaderUnit() { TU->setLocalOwningModule(Mod); } +/// Tests whether the given identifier is reserved as a module name and +/// diagnoses if it is. Returns true if a diagnostic is emitted and false +/// otherwise. +static bool DiagReservedModuleName(Sema &S, const IdentifierInfo *II, + SourceLocation Loc) { + enum { + Valid = -1, + Invalid = 0, + Reserved = 1, + } Reason = Valid; + + StringRef PartName = II->getName(); + if (II->isStr("module") || II->isStr("import")) + Reason = Invalid; + else if (II->isReserved(S.getLangOpts()) != + ReservedIdentifierStatus::NotReserved) + Reason = Reserved; + + // If the identifier is reserved (not invalid) but is in a system header, + // we do not diagnose (because we expect system headers to use reserved + // identifiers). + if (Reason == Reserved && S.getSourceManager().isInSystemHeader(Loc)) + Reason = Valid; + + if (Reason != Valid) { + S.Diag(Loc, diag::err_invalid_module_name) << II << (int)Reason; + return true; + } + return false; +} + Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path, @@ -238,6 +269,32 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, } } + // C++2b [module.unit]p1: ... The identifiers module and import shall not + // appear as identifiers in a module-name or module-partition. All + // module-names either beginning with an identifier consisting of std + // followed by zero or more digits or containing a reserved identifier + // ([lex.name]) are reserved and shall not be specified in a + // module-declaration; no diagnostic is required. + + // Test the first part of the path to see if it's std[0-9]+ but allow the + // name in a system header. + StringRef FirstComponentName = Path[0].first->getName(); + if (!getSourceManager().isInSystemHeader(Path[0].second) && + (FirstComponentName == "std" || + (FirstComponentName.startswith("std") && + llvm::all_of(FirstComponentName.drop_front(3), &llvm::isDigit)))) { + Diag(Path[0].second, diag::err_invalid_module_name) + << Path[0].first << /*reserved*/ 1; + return nullptr; + } + + // Then test all of the components in the path to see if any of them are + // using another kind of reserved or invalid identifier. + for (auto Part : Path) { + if (DiagReservedModuleName(*this, Part.first, Part.second)) + return nullptr; + } + // Flatten the dots in a module name. Unlike Clang's hierarchical module map // modules, the dots here are just another character that can appear in a // module name. diff --git a/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm b/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm index 7d4992a2adce8..99fb2327b2d56 100644 --- a/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm +++ b/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm @@ -6,7 +6,9 @@ class Pooh; class Piglet; # 8 "" 2 +# 8 "" 1 3 export module std; // might happen, you can't say it won't! +# 9 "" 2 3 namespace std { export template class allocator { diff --git a/clang/test/Modules/pair-unambiguous-ctor.cppm b/clang/test/Modules/pair-unambiguous-ctor.cppm index 8022f34f3aafa..eb242244260cb 100644 --- a/clang/test/Modules/pair-unambiguous-ctor.cppm +++ b/clang/test/Modules/pair-unambiguous-ctor.cppm @@ -14,7 +14,9 @@ // expected-no-diagnostics module; #include "config.h" +# 3 "pair-unambiguous-ctor.cppm" 1 3 export module std:M; +# 3 "pair-unambiguous-ctor.cppm" 2 3 import :string; import :algorithm; @@ -25,15 +27,19 @@ auto check() { //--- string.cppm module; #include "string.h" +# 28 "pair-unambiguous-ctor.cppm" 1 3 export module std:string; export namespace std { using std::string; } +# 28 "pair-unambiguous-ctor.cppm" 2 3 //--- algorithm.cppm module; #include "algorithm.h" +# 38 "pair-unambiguous-ctor.cppm" 1 3 export module std:algorithm; +# 38 "pair-unambiguous-ctor.cppm" 2 3 //--- pair.h namespace std __attribute__ ((__visibility__ ("default"))) diff --git a/clang/test/Modules/reserved-names-1.cpp b/clang/test/Modules/reserved-names-1.cpp new file mode 100644 index 0000000000000..fd636ab0630a2 --- /dev/null +++ b/clang/test/Modules/reserved-names-1.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s + +// expected-note@1 15{{add 'module;' to the start of the file to introduce a global module fragment}} + +module std; // expected-error {{'std' is a reserved name for a module}} +module _Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +module std0; // expected-error {{'std0' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +export module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module import; // expected-error {{'import' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module _Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module __test; // expected-error {{'__test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module te__st; // expected-error {{'te__st' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std; // expected-error {{'std' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std.foo;// expected-error {{'std' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std0; // expected-error {{'std0' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std1000000; // expected-error {{'std1000000' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module should_fail._Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +// Show that being in a system header doesn't save you from diagnostics about +// use of an invalid module-name identifier. +# 34 "reserved-names-1.cpp" 1 3 +export module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +export module _Test.import; // expected-error {{'import' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +# 39 "reserved-names-1.cpp" 2 3 + +// We can still use a reserved name on imoport. +import std; // expected-error {{module 'std' not found}} diff --git a/clang/test/Modules/reserved-names-2.cpp b/clang/test/Modules/reserved-names-2.cpp new file mode 100644 index 0000000000000..6979e92f37765 --- /dev/null +++ b/clang/test/Modules/reserved-names-2.cpp @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' followed by digits to be a +// reserved identifier if it is not the first part of the path. +export module should_succeed.std0; diff --git a/clang/test/Modules/reserved-names-3.cpp b/clang/test/Modules/reserved-names-3.cpp new file mode 100644 index 0000000000000..b2e155e8d3610 --- /dev/null +++ b/clang/test/Modules/reserved-names-3.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' (potentially followed by +// zero or more digits) to be a reserved identifier if it is not the only part +// of the path. +export module std12Three; diff --git a/clang/test/Modules/reserved-names-4.cpp b/clang/test/Modules/reserved-names-4.cpp new file mode 100644 index 0000000000000..73df48b76de8d --- /dev/null +++ b/clang/test/Modules/reserved-names-4.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' a reserved identifier if it +// is not the first part of the path. +export module should_succeed.std; + diff --git a/clang/test/Modules/reserved-names-system-header-1.cpp b/clang/test/Modules/reserved-names-system-header-1.cpp new file mode 100644 index 0000000000000..2db4c08add1d9 --- /dev/null +++ b/clang/test/Modules/reserved-names-system-header-1.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Show that we suppress the reserved identifier diagnostic in a system header. +# 100 "file.cpp" 1 3 // Enter a system header +export module std; +# 100 "file.cpp" 2 3 // Leave the system header diff --git a/clang/test/Modules/reserved-names-system-header-2.cpp b/clang/test/Modules/reserved-names-system-header-2.cpp new file mode 100644 index 0000000000000..2087f487721cb --- /dev/null +++ b/clang/test/Modules/reserved-names-system-header-2.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Show that we suppress the reserved identifier diagnostic in a system header. +# 100 "file.cpp" 1 3 // Enter a system header +export module __test; +# 100 "file.cpp" 2 3 // Leave the system header From 8b015b2078551c52a6811f58e5c4919bb8f9ff5a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 2 Nov 2022 14:29:24 -0700 Subject: [PATCH 135/516] [SLP][NFC]Formatting and reduce number of iterations, NFC. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index be3f560ead854..8a44c4d143506 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3996,7 +3996,7 @@ static bool isRepeatedNonIdentityClusteredMask(ArrayRef Mask, ArrayRef FirstCluster = Mask.slice(0, Sz); if (ShuffleVectorInst::isIdentityMask(FirstCluster)) return false; - for (unsigned I = 0, E = Mask.size(); I < E; I += Sz) { + for (unsigned I = Sz, E = Mask.size(); I < E; I += Sz) { ArrayRef Cluster = Mask.slice(I, Sz); if (Cluster != FirstCluster) return false; @@ -4017,8 +4017,8 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { // Try to improve gathered nodes with clustered reuses, if possible. reorderScalars(TE.Scalars, makeArrayRef(TE.ReuseShuffleIndices).slice(0, Sz)); // Fill the reuses mask with the identity submasks. - for (auto It = TE.ReuseShuffleIndices.begin(), - End = TE.ReuseShuffleIndices.end(); + for (auto *It = TE.ReuseShuffleIndices.begin(), + *End = TE.ReuseShuffleIndices.end(); It != End; std::advance(It, Sz)) std::iota(It, std::next(It + Sz), 0); } From f090e3c00fc31b99049a2291ebd07075fbdf0be1 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 3 Nov 2022 05:30:41 -0700 Subject: [PATCH 136/516] [SLP]Fix write after bounds. Need to use comma instead of + symbol to prevent writing after bounds. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8a44c4d143506..f874cfca2876c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4020,7 +4020,7 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { for (auto *It = TE.ReuseShuffleIndices.begin(), *End = TE.ReuseShuffleIndices.end(); It != End; std::advance(It, Sz)) - std::iota(It, std::next(It + Sz), 0); + std::iota(It, std::next(It, Sz), 0); } void BoUpSLP::reorderTopToBottom() { From 964c4948cd4adb8e5d436796b02eb4bcd0b9641f Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 3 Nov 2022 13:57:46 +0100 Subject: [PATCH 137/516] Remove an unused local variable, NFC. --- clang/lib/Sema/SemaModule.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 19e2c206375bd..1db716e77e7d5 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -155,7 +155,6 @@ static bool DiagReservedModuleName(Sema &S, const IdentifierInfo *II, Reserved = 1, } Reason = Valid; - StringRef PartName = II->getName(); if (II->isStr("module") || II->isStr("import")) Reason = Invalid; else if (II->isReserved(S.getLangOpts()) != From 592a96c03b0c587404e78d69bbf072609b1e6417 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 13:08:14 +0100 Subject: [PATCH 138/516] [SimplifyCFG] Extract code for tracking ephemeral values (NFC) To allow reusing this in more places in SimplifyCFG. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 40 ++++++++++++++++------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index fcdd85838340d..bf0eca555014d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2635,6 +2635,32 @@ static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { return Changed; } +namespace { +/// Track ephemeral values, which should be ignored for cost-modelling +/// purposes. Requires walking instructions in reverse order. +class EphemeralValueTracker { + SmallPtrSet EphValues; + + bool isEphemeral(const Instruction *I) { + if (isa(I)) + return true; + return !I->mayHaveSideEffects() && !I->isTerminator() && + all_of(I->users(), [&](const User *U) { + return EphValues.count(cast(U)); + }); + } + +public: + bool track(const Instruction *I) { + if (isEphemeral(I)) { + EphValues.insert(I); + return true; + } + return false; + } +}; +} // namespace + /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// @@ -3002,15 +3028,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, /// Return true if we can thread a branch across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; - - SmallPtrSet EphValues; - auto IsEphemeral = [&](const Instruction *I) { - if (isa(I)) - return true; - return !I->mayHaveSideEffects() && !I->isTerminator() && - all_of(I->users(), - [&](const User *U) { return EphValues.count(U); }); - }; + EphemeralValueTracker EphTracker; // Walk the loop in reverse so that we can identify ephemeral values properly // (values only feeding assumes). @@ -3021,11 +3039,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { return false; // Ignore ephemeral values which are deleted during codegen. - if (IsEphemeral(&I)) - EphValues.insert(&I); // We will delete Phis while threading, so Phis should not be accounted in // block's size. - else if (!isa(I)) { + if (!EphTracker.track(&I) && !isa(I)) { if (Size++ > MaxSmallBlockSize) return false; // Don't clone large BB's. } From 9df924a634ac5ea702b0d8d0d8b737c819a98095 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 2 Nov 2022 09:55:54 -0500 Subject: [PATCH 139/516] [PowerPC] Add new DMR register classes to Future CPU. A new register class as well as a number of related subregisters are being added to Future CPU. These registers are Dense Math Registers (DMR) and are 1024 bits long. These regsiters can also be used in consecutive pairs which leads to a register that is 2048 bits. This patch also adds 7 new instructions that use these registers. More instructions will be added in future patches. Reviewed By: amyk, saghir Differential Revision: https://reviews.llvm.org/D136366 --- llvm/include/llvm/CodeGen/ValueTypes.td | 361 +++++++++--------- llvm/include/llvm/IR/Intrinsics.td | 1 + llvm/include/llvm/Support/MachineValueType.h | 360 ++++++++--------- llvm/lib/CodeGen/ValueTypes.cpp | 2 + .../Target/PowerPC/AsmParser/PPCAsmParser.cpp | 85 ++++- .../PowerPC/Disassembler/PPCDisassembler.cpp | 41 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 31 +- llvm/lib/Target/PowerPC/PPCInstrFuture.td | 14 + llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 116 ++++++ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 + llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 28 +- llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 46 +++ llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td | 164 ++++++++ llvm/lib/Target/PowerPC/PPCScheduleP9.td | 9 +- .../CodeGen/PowerPC/future-check-features.ll | 14 +- .../PowerPC/ppc-encoding-ISAFuture.txt | 41 ++ .../PowerPC/ppc64le-encoding-ISAFuture.txt | 35 ++ llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 50 +++ llvm/utils/TableGen/CodeGenTarget.cpp | 1 + 19 files changed, 1034 insertions(+), 367 deletions(-) create mode 100644 llvm/lib/Target/PowerPC/PPCInstrFuture.td create mode 100644 llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td create mode 100644 llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td create mode 100644 llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt create mode 100644 llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt create mode 100644 llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 8ed27f1a0c568..9fc145551880f 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -47,186 +47,187 @@ def v128i1 : ValueType<128, 24>; // 128 x i1 vector value def v256i1 : ValueType<256, 25>; // 256 x i1 vector value def v512i1 : ValueType<512, 26>; // 512 x i1 vector value def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value - -def v128i2 : ValueType<256, 28>; // 128 x i2 vector value -def v256i2 : ValueType<512, 29>; // 256 x i2 vector value - -def v64i4 : ValueType<256, 30>; // 64 x i4 vector value -def v128i4 : ValueType<512, 31>; // 128 x i4 vector value - -def v1i8 : ValueType<8, 32>; // 1 x i8 vector value -def v2i8 : ValueType<16, 33>; // 2 x i8 vector value -def v4i8 : ValueType<32, 34>; // 4 x i8 vector value -def v8i8 : ValueType<64, 35>; // 8 x i8 vector value -def v16i8 : ValueType<128, 36>; // 16 x i8 vector value -def v32i8 : ValueType<256, 37>; // 32 x i8 vector value -def v64i8 : ValueType<512, 38>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 39>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 40>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 41>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 42>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 43>; // 1 x i16 vector value -def v2i16 : ValueType<32, 44>; // 2 x i16 vector value -def v3i16 : ValueType<48, 45>; // 3 x i16 vector value -def v4i16 : ValueType<64, 46>; // 4 x i16 vector value -def v8i16 : ValueType<128, 47>; // 8 x i16 vector value -def v16i16 : ValueType<256, 48>; // 16 x i16 vector value -def v32i16 : ValueType<512, 49>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 50>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 51>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 52>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 53>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 54>; // 1 x i32 vector value -def v2i32 : ValueType<64, 55>; // 2 x i32 vector value -def v3i32 : ValueType<96, 56>; // 3 x i32 vector value -def v4i32 : ValueType<128, 57>; // 4 x i32 vector value -def v5i32 : ValueType<160, 58>; // 5 x i32 vector value -def v6i32 : ValueType<192, 59>; // 6 x f32 vector value -def v7i32 : ValueType<224, 60>; // 7 x f32 vector value -def v8i32 : ValueType<256, 61>; // 8 x i32 vector value -def v16i32 : ValueType<512, 62>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 63>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 64>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 65>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 66>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 67>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 68>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 69>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 70>; // 1 x i64 vector value -def v2i64 : ValueType<128, 71>; // 2 x i64 vector value -def v3i64 : ValueType<192, 72>; // 3 x i64 vector value -def v4i64 : ValueType<256, 73>; // 4 x i64 vector value -def v8i64 : ValueType<512, 74>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 75>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 76>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 77>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 78>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 79>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 80>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 81>; // 1 x f16 vector value -def v2f16 : ValueType<32, 82>; // 2 x f16 vector value -def v3f16 : ValueType<48, 83>; // 3 x f16 vector value -def v4f16 : ValueType<64, 84>; // 4 x f16 vector value -def v8f16 : ValueType<128, 85>; // 8 x f16 vector value -def v16f16 : ValueType<256, 86>; // 16 x f16 vector value -def v32f16 : ValueType<512, 87>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 88>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 89>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 90>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 91>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 92>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 93>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 94>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 95>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 96>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 97>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 98>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 99>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 100>; // 1 x f32 vector value -def v2f32 : ValueType<64, 101>; // 2 x f32 vector value -def v3f32 : ValueType<96, 102>; // 3 x f32 vector value -def v4f32 : ValueType<128, 103>; // 4 x f32 vector value -def v5f32 : ValueType<160, 104>; // 5 x f32 vector value -def v6f32 : ValueType<192, 105>; // 6 x f32 vector value -def v7f32 : ValueType<224, 106>; // 7 x f32 vector value -def v8f32 : ValueType<256, 107>; // 8 x f32 vector value -def v16f32 : ValueType<512, 108>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 109>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 110>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 111>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 112>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 113>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 114>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 115>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 116>; // 1 x f64 vector value -def v2f64 : ValueType<128, 117>; // 2 x f64 vector value -def v3f64 : ValueType<192, 118>; // 3 x f64 vector value -def v4f64 : ValueType<256, 119>; // 4 x f64 vector value -def v8f64 : ValueType<512, 120>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 121>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 122>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 123>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 124>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 125>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 126>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 127>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 128>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 129>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 130>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 131>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 132>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 133>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 134>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 135>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 136>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 137>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 138>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 139>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 140>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 141>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 142>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 143>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 144>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 145>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 146>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 147>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 148>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 149>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 150>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 151>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 152>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 153>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 154>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 155>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 156>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 157>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 158>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 159>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 160>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 161>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 162>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 163>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 164>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 165>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 166>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 167>; // n x 8 x bf16 vector value -def nxv16bf16 : ValueType<256, 168>; // n x 16 x bf16 vector value -def nxv32bf16 : ValueType<512, 169>; // n x 32 x bf16 vector value - -def nxv1f32 : ValueType<32, 170>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 171>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 172>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 173>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 174>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 175>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 176>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 177>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 178>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 179>; // X86 MMX value -def FlagVT : ValueType<0, 180>; // Pre-RA sched glue -def isVoid : ValueType<0, 181>; // Produces no value -def untyped : ValueType<8, 182>; // Produces an untyped value -def funcref : ValueType<0, 183>; // WebAssembly's funcref type -def externref : ValueType<0, 184>; // WebAssembly's externref type -def x86amx : ValueType<8192, 185>; // X86 AMX value -def i64x8 : ValueType<512, 186>; // 8 Consecutive GPRs (AArch64) +def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value + +def v128i2 : ValueType<256, 29>; // 128 x i2 vector value +def v256i2 : ValueType<512, 30>; // 256 x i2 vector value + +def v64i4 : ValueType<256, 31>; // 64 x i4 vector value +def v128i4 : ValueType<512, 32>; // 128 x i4 vector value + +def v1i8 : ValueType<8, 33>; // 1 x i8 vector value +def v2i8 : ValueType<16, 34>; // 2 x i8 vector value +def v4i8 : ValueType<32, 35>; // 4 x i8 vector value +def v8i8 : ValueType<64, 36>; // 8 x i8 vector value +def v16i8 : ValueType<128, 37>; // 16 x i8 vector value +def v32i8 : ValueType<256, 38>; // 32 x i8 vector value +def v64i8 : ValueType<512, 39>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 44>; // 1 x i16 vector value +def v2i16 : ValueType<32, 45>; // 2 x i16 vector value +def v3i16 : ValueType<48, 46>; // 3 x i16 vector value +def v4i16 : ValueType<64, 47>; // 4 x i16 vector value +def v8i16 : ValueType<128, 48>; // 8 x i16 vector value +def v16i16 : ValueType<256, 49>; // 16 x i16 vector value +def v32i16 : ValueType<512, 50>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 55>; // 1 x i32 vector value +def v2i32 : ValueType<64, 56>; // 2 x i32 vector value +def v3i32 : ValueType<96, 57>; // 3 x i32 vector value +def v4i32 : ValueType<128, 58>; // 4 x i32 vector value +def v5i32 : ValueType<160, 59>; // 5 x i32 vector value +def v6i32 : ValueType<192, 60>; // 6 x f32 vector value +def v7i32 : ValueType<224, 61>; // 7 x f32 vector value +def v8i32 : ValueType<256, 62>; // 8 x i32 vector value +def v16i32 : ValueType<512, 63>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 64>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 65>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 66>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 67>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 68>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 69>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 70>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 71>; // 1 x i64 vector value +def v2i64 : ValueType<128, 72>; // 2 x i64 vector value +def v3i64 : ValueType<192, 73>; // 3 x i64 vector value +def v4i64 : ValueType<256, 74>; // 4 x i64 vector value +def v8i64 : ValueType<512, 75>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 76>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 77>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 78>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 79>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 80>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 81>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 82>; // 1 x f16 vector value +def v2f16 : ValueType<32, 83>; // 2 x f16 vector value +def v3f16 : ValueType<48, 84>; // 3 x f16 vector value +def v4f16 : ValueType<64, 85>; // 4 x f16 vector value +def v8f16 : ValueType<128, 86>; // 8 x f16 vector value +def v16f16 : ValueType<256, 87>; // 16 x f16 vector value +def v32f16 : ValueType<512, 88>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 89>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 90>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 91>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 92>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 93>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 94>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 95>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 96>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 97>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 98>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 99>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 100>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 101>; // 1 x f32 vector value +def v2f32 : ValueType<64, 102>; // 2 x f32 vector value +def v3f32 : ValueType<96, 103>; // 3 x f32 vector value +def v4f32 : ValueType<128, 104>; // 4 x f32 vector value +def v5f32 : ValueType<160, 105>; // 5 x f32 vector value +def v6f32 : ValueType<192, 106>; // 6 x f32 vector value +def v7f32 : ValueType<224, 107>; // 7 x f32 vector value +def v8f32 : ValueType<256, 108>; // 8 x f32 vector value +def v16f32 : ValueType<512, 109>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 110>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 111>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 112>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 113>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 114>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 115>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 116>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 117>; // 1 x f64 vector value +def v2f64 : ValueType<128, 118>; // 2 x f64 vector value +def v3f64 : ValueType<192, 119>; // 3 x f64 vector value +def v4f64 : ValueType<256, 120>; // 4 x f64 vector value +def v8f64 : ValueType<512, 121>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 122>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 123>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 124>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 125>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 126>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 127>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 128>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 129>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 130>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 131>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 132>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 133>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 134>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 135>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 136>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 137>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 138>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 139>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 140>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 141>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 142>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 143>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 144>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 145>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 146>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 147>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 148>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 149>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 150>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 151>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 152>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 153>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 154>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 155>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 156>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 157>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 158>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 159>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 160>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 161>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 162>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 163>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 164>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 165>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 166>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 167>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 168>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 169>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 170>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 171>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 172>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 173>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 174>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 175>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 176>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 177>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 178>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 179>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 180>; // X86 MMX value +def FlagVT : ValueType<0, 181>; // Pre-RA sched glue +def isVoid : ValueType<0, 182>; // Produces no value +def untyped : ValueType<8, 183>; // Produces an untyped value +def funcref : ValueType<0, 184>; // WebAssembly's funcref type +def externref : ValueType<0, 185>; // WebAssembly's externref type +def x86amx : ValueType<8192, 186>; // X86 AMX value +def i64x8 : ValueType<512, 187>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 64d186c508ce8..e8fb5c4aef191 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -281,6 +281,7 @@ def llvm_v128i1_ty : LLVMType; // 128 x i1 def llvm_v256i1_ty : LLVMType; // 256 x i1 def llvm_v512i1_ty : LLVMType; // 512 x i1 def llvm_v1024i1_ty : LLVMType; //1024 x i1 +def llvm_v2048i1_ty : LLVMType; //2048 x i1 def llvm_v1i8_ty : LLVMType; // 1 x i8 def llvm_v2i8_ty : LLVMType; // 2 x i8 diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 3e263a7739532..1fe4128516adc 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -74,118 +74,119 @@ namespace llvm { v256i1 = 25, // 256 x i1 v512i1 = 26, // 512 x i1 v1024i1 = 27, // 1024 x i1 - - v128i2 = 28, // 128 x i2 - v256i2 = 29, // 256 x i2 - - v64i4 = 30, // 64 x i4 - v128i4 = 31, // 128 x i4 - - v1i8 = 32, // 1 x i8 - v2i8 = 33, // 2 x i8 - v4i8 = 34, // 4 x i8 - v8i8 = 35, // 8 x i8 - v16i8 = 36, // 16 x i8 - v32i8 = 37, // 32 x i8 - v64i8 = 38, // 64 x i8 - v128i8 = 39, // 128 x i8 - v256i8 = 40, // 256 x i8 - v512i8 = 41, // 512 x i8 - v1024i8 = 42, // 1024 x i8 - - v1i16 = 43, // 1 x i16 - v2i16 = 44, // 2 x i16 - v3i16 = 45, // 3 x i16 - v4i16 = 46, // 4 x i16 - v8i16 = 47, // 8 x i16 - v16i16 = 48, // 16 x i16 - v32i16 = 49, // 32 x i16 - v64i16 = 50, // 64 x i16 - v128i16 = 51, // 128 x i16 - v256i16 = 52, // 256 x i16 - v512i16 = 53, // 512 x i16 - - v1i32 = 54, // 1 x i32 - v2i32 = 55, // 2 x i32 - v3i32 = 56, // 3 x i32 - v4i32 = 57, // 4 x i32 - v5i32 = 58, // 5 x i32 - v6i32 = 59, // 6 x i32 - v7i32 = 60, // 7 x i32 - v8i32 = 61, // 8 x i32 - v16i32 = 62, // 16 x i32 - v32i32 = 63, // 32 x i32 - v64i32 = 64, // 64 x i32 - v128i32 = 65, // 128 x i32 - v256i32 = 66, // 256 x i32 - v512i32 = 67, // 512 x i32 - v1024i32 = 68, // 1024 x i32 - v2048i32 = 69, // 2048 x i32 - - v1i64 = 70, // 1 x i64 - v2i64 = 71, // 2 x i64 - v3i64 = 72, // 3 x i64 - v4i64 = 73, // 4 x i64 - v8i64 = 74, // 8 x i64 - v16i64 = 75, // 16 x i64 - v32i64 = 76, // 32 x i64 - v64i64 = 77, // 64 x i64 - v128i64 = 78, // 128 x i64 - v256i64 = 79, // 256 x i64 - - v1i128 = 80, // 1 x i128 + v2048i1 = 28, // 2048 x i1 + + v128i2 = 29, // 128 x i2 + v256i2 = 30, // 256 x i2 + + v64i4 = 31, // 64 x i4 + v128i4 = 32, // 128 x i4 + + v1i8 = 33, // 1 x i8 + v2i8 = 34, // 2 x i8 + v4i8 = 35, // 4 x i8 + v8i8 = 36, // 8 x i8 + v16i8 = 37, // 16 x i8 + v32i8 = 38, // 32 x i8 + v64i8 = 39, // 64 x i8 + v128i8 = 40, // 128 x i8 + v256i8 = 41, // 256 x i8 + v512i8 = 42, // 512 x i8 + v1024i8 = 43, // 1024 x i8 + + v1i16 = 44, // 1 x i16 + v2i16 = 45, // 2 x i16 + v3i16 = 46, // 3 x i16 + v4i16 = 47, // 4 x i16 + v8i16 = 48, // 8 x i16 + v16i16 = 49, // 16 x i16 + v32i16 = 50, // 32 x i16 + v64i16 = 51, // 64 x i16 + v128i16 = 52, // 128 x i16 + v256i16 = 53, // 256 x i16 + v512i16 = 54, // 512 x i16 + + v1i32 = 55, // 1 x i32 + v2i32 = 56, // 2 x i32 + v3i32 = 57, // 3 x i32 + v4i32 = 58, // 4 x i32 + v5i32 = 59, // 5 x i32 + v6i32 = 60, // 6 x i32 + v7i32 = 61, // 7 x i32 + v8i32 = 62, // 8 x i32 + v16i32 = 63, // 16 x i32 + v32i32 = 64, // 32 x i32 + v64i32 = 65, // 64 x i32 + v128i32 = 66, // 128 x i32 + v256i32 = 67, // 256 x i32 + v512i32 = 68, // 512 x i32 + v1024i32 = 69, // 1024 x i32 + v2048i32 = 70, // 2048 x i32 + + v1i64 = 71, // 1 x i64 + v2i64 = 72, // 2 x i64 + v3i64 = 73, // 3 x i64 + v4i64 = 74, // 4 x i64 + v8i64 = 75, // 8 x i64 + v16i64 = 76, // 16 x i64 + v32i64 = 77, // 32 x i64 + v64i64 = 78, // 64 x i64 + v128i64 = 79, // 128 x i64 + v256i64 = 80, // 256 x i64 + + v1i128 = 81, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 81, // 1 x f16 - v2f16 = 82, // 2 x f16 - v3f16 = 83, // 3 x f16 - v4f16 = 84, // 4 x f16 - v8f16 = 85, // 8 x f16 - v16f16 = 86, // 16 x f16 - v32f16 = 87, // 32 x f16 - v64f16 = 88, // 64 x f16 - v128f16 = 89, // 128 x f16 - v256f16 = 90, // 256 x f16 - v512f16 = 91, // 512 x f16 - - v2bf16 = 92, // 2 x bf16 - v3bf16 = 93, // 3 x bf16 - v4bf16 = 94, // 4 x bf16 - v8bf16 = 95, // 8 x bf16 - v16bf16 = 96, // 16 x bf16 - v32bf16 = 97, // 32 x bf16 - v64bf16 = 98, // 64 x bf16 - v128bf16 = 99, // 128 x bf16 - - v1f32 = 100, // 1 x f32 - v2f32 = 101, // 2 x f32 - v3f32 = 102, // 3 x f32 - v4f32 = 103, // 4 x f32 - v5f32 = 104, // 5 x f32 - v6f32 = 105, // 6 x f32 - v7f32 = 106, // 7 x f32 - v8f32 = 107, // 8 x f32 - v16f32 = 108, // 16 x f32 - v32f32 = 109, // 32 x f32 - v64f32 = 110, // 64 x f32 - v128f32 = 111, // 128 x f32 - v256f32 = 112, // 256 x f32 - v512f32 = 113, // 512 x f32 - v1024f32 = 114, // 1024 x f32 - v2048f32 = 115, // 2048 x f32 - - v1f64 = 116, // 1 x f64 - v2f64 = 117, // 2 x f64 - v3f64 = 118, // 3 x f64 - v4f64 = 119, // 4 x f64 - v8f64 = 120, // 8 x f64 - v16f64 = 121, // 16 x f64 - v32f64 = 122, // 32 x f64 - v64f64 = 123, // 64 x f64 - v128f64 = 124, // 128 x f64 - v256f64 = 125, // 256 x f64 + v1f16 = 82, // 1 x f16 + v2f16 = 83, // 2 x f16 + v3f16 = 84, // 3 x f16 + v4f16 = 85, // 4 x f16 + v8f16 = 86, // 8 x f16 + v16f16 = 87, // 16 x f16 + v32f16 = 88, // 32 x f16 + v64f16 = 89, // 64 x f16 + v128f16 = 90, // 128 x f16 + v256f16 = 91, // 256 x f16 + v512f16 = 92, // 512 x f16 + + v2bf16 = 93, // 2 x bf16 + v3bf16 = 94, // 3 x bf16 + v4bf16 = 95, // 4 x bf16 + v8bf16 = 96, // 8 x bf16 + v16bf16 = 97, // 16 x bf16 + v32bf16 = 98, // 32 x bf16 + v64bf16 = 99, // 64 x bf16 + v128bf16 = 100, // 128 x bf16 + + v1f32 = 101, // 1 x f32 + v2f32 = 102, // 2 x f32 + v3f32 = 103, // 3 x f32 + v4f32 = 104, // 4 x f32 + v5f32 = 105, // 5 x f32 + v6f32 = 106, // 6 x f32 + v7f32 = 107, // 7 x f32 + v8f32 = 108, // 8 x f32 + v16f32 = 109, // 16 x f32 + v32f32 = 110, // 32 x f32 + v64f32 = 111, // 64 x f32 + v128f32 = 112, // 128 x f32 + v256f32 = 113, // 256 x f32 + v512f32 = 114, // 512 x f32 + v1024f32 = 115, // 1024 x f32 + v2048f32 = 116, // 2048 x f32 + + v1f64 = 117, // 1 x f64 + v2f64 = 118, // 2 x f64 + v3f64 = 119, // 3 x f64 + v4f64 = 120, // 4 x f64 + v8f64 = 121, // 8 x f64 + v16f64 = 122, // 16 x f64 + v32f64 = 123, // 32 x f64 + v64f64 = 124, // 64 x f64 + v128f64 = 125, // 128 x f64 + v256f64 = 126, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -193,70 +194,70 @@ namespace llvm { FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 126, // n x 1 x i1 - nxv2i1 = 127, // n x 2 x i1 - nxv4i1 = 128, // n x 4 x i1 - nxv8i1 = 129, // n x 8 x i1 - nxv16i1 = 130, // n x 16 x i1 - nxv32i1 = 131, // n x 32 x i1 - nxv64i1 = 132, // n x 64 x i1 - - nxv1i8 = 133, // n x 1 x i8 - nxv2i8 = 134, // n x 2 x i8 - nxv4i8 = 135, // n x 4 x i8 - nxv8i8 = 136, // n x 8 x i8 - nxv16i8 = 137, // n x 16 x i8 - nxv32i8 = 138, // n x 32 x i8 - nxv64i8 = 139, // n x 64 x i8 - - nxv1i16 = 140, // n x 1 x i16 - nxv2i16 = 141, // n x 2 x i16 - nxv4i16 = 142, // n x 4 x i16 - nxv8i16 = 143, // n x 8 x i16 - nxv16i16 = 144, // n x 16 x i16 - nxv32i16 = 145, // n x 32 x i16 - - nxv1i32 = 146, // n x 1 x i32 - nxv2i32 = 147, // n x 2 x i32 - nxv4i32 = 148, // n x 4 x i32 - nxv8i32 = 149, // n x 8 x i32 - nxv16i32 = 150, // n x 16 x i32 - nxv32i32 = 151, // n x 32 x i32 - - nxv1i64 = 152, // n x 1 x i64 - nxv2i64 = 153, // n x 2 x i64 - nxv4i64 = 154, // n x 4 x i64 - nxv8i64 = 155, // n x 8 x i64 - nxv16i64 = 156, // n x 16 x i64 - nxv32i64 = 157, // n x 32 x i64 + nxv1i1 = 127, // n x 1 x i1 + nxv2i1 = 128, // n x 2 x i1 + nxv4i1 = 129, // n x 4 x i1 + nxv8i1 = 130, // n x 8 x i1 + nxv16i1 = 131, // n x 16 x i1 + nxv32i1 = 132, // n x 32 x i1 + nxv64i1 = 133, // n x 64 x i1 + + nxv1i8 = 134, // n x 1 x i8 + nxv2i8 = 135, // n x 2 x i8 + nxv4i8 = 136, // n x 4 x i8 + nxv8i8 = 137, // n x 8 x i8 + nxv16i8 = 138, // n x 16 x i8 + nxv32i8 = 139, // n x 32 x i8 + nxv64i8 = 140, // n x 64 x i8 + + nxv1i16 = 141, // n x 1 x i16 + nxv2i16 = 142, // n x 2 x i16 + nxv4i16 = 143, // n x 4 x i16 + nxv8i16 = 144, // n x 8 x i16 + nxv16i16 = 145, // n x 16 x i16 + nxv32i16 = 146, // n x 32 x i16 + + nxv1i32 = 147, // n x 1 x i32 + nxv2i32 = 148, // n x 2 x i32 + nxv4i32 = 149, // n x 4 x i32 + nxv8i32 = 150, // n x 8 x i32 + nxv16i32 = 151, // n x 16 x i32 + nxv32i32 = 152, // n x 32 x i32 + + nxv1i64 = 153, // n x 1 x i64 + nxv2i64 = 154, // n x 2 x i64 + nxv4i64 = 155, // n x 4 x i64 + nxv8i64 = 156, // n x 8 x i64 + nxv16i64 = 157, // n x 16 x i64 + nxv32i64 = 158, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 158, // n x 1 x f16 - nxv2f16 = 159, // n x 2 x f16 - nxv4f16 = 160, // n x 4 x f16 - nxv8f16 = 161, // n x 8 x f16 - nxv16f16 = 162, // n x 16 x f16 - nxv32f16 = 163, // n x 32 x f16 - - nxv1bf16 = 164, // n x 1 x bf16 - nxv2bf16 = 165, // n x 2 x bf16 - nxv4bf16 = 166, // n x 4 x bf16 - nxv8bf16 = 167, // n x 8 x bf16 - nxv16bf16 = 168, // n x 16 x bf16 - nxv32bf16 = 169, // n x 32 x bf16 - - nxv1f32 = 170, // n x 1 x f32 - nxv2f32 = 171, // n x 2 x f32 - nxv4f32 = 172, // n x 4 x f32 - nxv8f32 = 173, // n x 8 x f32 - nxv16f32 = 174, // n x 16 x f32 - - nxv1f64 = 175, // n x 1 x f64 - nxv2f64 = 176, // n x 2 x f64 - nxv4f64 = 177, // n x 4 x f64 - nxv8f64 = 178, // n x 8 x f64 + nxv1f16 = 159, // n x 1 x f16 + nxv2f16 = 160, // n x 2 x f16 + nxv4f16 = 161, // n x 4 x f16 + nxv8f16 = 162, // n x 8 x f16 + nxv16f16 = 163, // n x 16 x f16 + nxv32f16 = 164, // n x 32 x f16 + + nxv1bf16 = 165, // n x 1 x bf16 + nxv2bf16 = 166, // n x 2 x bf16 + nxv4bf16 = 167, // n x 4 x bf16 + nxv8bf16 = 168, // n x 8 x bf16 + nxv16bf16 = 169, // n x 16 x bf16 + nxv32bf16 = 170, // n x 32 x bf16 + + nxv1f32 = 171, // n x 1 x f32 + nxv2f32 = 172, // n x 2 x f32 + nxv4f32 = 173, // n x 4 x f32 + nxv8f32 = 174, // n x 8 x f32 + nxv16f32 = 175, // n x 16 x f32 + + nxv1f64 = 176, // n x 1 x f64 + nxv2f64 = 177, // n x 2 x f64 + nxv4f64 = 178, // n x 4 x f64 + nxv8f64 = 179, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -267,20 +268,20 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 179, // This is an X86 MMX value + x86mmx = 180, // This is an X86 MMX value - Glue = 180, // This glues nodes together during pre-RA sched + Glue = 181, // This glues nodes together during pre-RA sched - isVoid = 181, // This has no value + isVoid = 182, // This has no value - Untyped = 182, // This value takes a register, but has + Untyped = 183, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 183, // WebAssembly's funcref type - externref = 184, // WebAssembly's externref type - x86amx = 185, // This is an X86 AMX value - i64x8 = 186, // 8 Consecutive GPRs (AArch64) + funcref = 184, // WebAssembly's funcref type + externref = 185, // WebAssembly's externref type + x86amx = 186, // This is an X86 AMX value + i64x8 = 187, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = i64x8, // This always remains at the end of the list. @@ -456,7 +457,8 @@ namespace llvm { return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 || SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64 || SimpleTy == MVT::v128f16 || SimpleTy == MVT::v64f32 || - SimpleTy == MVT::v32f64 || SimpleTy == MVT::v128bf16); + SimpleTy == MVT::v32f64 || SimpleTy == MVT::v128bf16 || + SimpleTy == MVT::v2048i1); } /// Return true if this is an overloaded type for TableGen. @@ -544,6 +546,7 @@ namespace llvm { case v256i1: case v512i1: case v1024i1: + case v2048i1: case nxv1i1: case nxv2i1: case nxv4i1: @@ -704,6 +707,7 @@ namespace llvm { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); + case v2048i1: case v2048i32: case v2048f32: return 2048; case v1024i1: @@ -1054,6 +1058,7 @@ namespace llvm { case v16f64: return TypeSize::Fixed(1024); case nxv32i32: case nxv16i64: return TypeSize::Scalable(1024); + case v2048i1: case v256i8: case v128i16: case v64i32: @@ -1239,6 +1244,7 @@ namespace llvm { if (NumElements == 256) return MVT::v256i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; + if (NumElements == 2048) return MVT::v2048i1; break; case MVT::i2: if (NumElements == 128) return MVT::v128i2; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 1bc03f0dfd833..cea97a30828ef 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -232,6 +232,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v2048i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 2048); case MVT::v128i2: return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); case MVT::v256i2: diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 915f689ac688c..e7f7c0cd32ed8 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -29,7 +29,7 @@ using namespace llvm; -DEFINE_PPC_REGCLASSES; +DEFINE_PPC_REGCLASSES // Evaluate an expression containing condition register // or condition register field symbols. Returns positive @@ -291,6 +291,26 @@ struct PPCOperand : public MCParsedAsmOperand { return (unsigned) Imm.Val; } + unsigned getDMRROWReg() const { + assert(isDMRROWRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRROWpReg() const { + assert(isDMRROWpRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRReg() const { + assert(isDMRRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRpReg() const { + assert(isDMRpRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + unsigned getVSRpEvenReg() const { assert(isVSRpEvenRegNumber() && "Invalid access!"); return (unsigned) Imm.Val >> 1; @@ -391,6 +411,18 @@ struct PPCOperand : public MCParsedAsmOperand { bool isACCRegNumber() const { return Kind == Immediate && isUInt<3>(getImm()); } + bool isDMRROWRegNumber() const { + return Kind == Immediate && isUInt<6>(getImm()); + } + bool isDMRROWpRegNumber() const { + return Kind == Immediate && isUInt<5>(getImm()); + } + bool isDMRRegNumber() const { + return Kind == Immediate && isUInt<3>(getImm()); + } + bool isDMRpRegNumber() const { + return Kind == Immediate && isUInt<2>(getImm()); + } bool isVSRpEvenRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0); } @@ -507,6 +539,36 @@ struct PPCOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()])); } + void addRegDMRROWRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRROWRegs[getDMRROWReg()])); + } + + void addRegDMRROWpRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRROWpRegs[getDMRROWpReg()])); + } + + void addRegDMRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRRegs[getDMRReg()])); + } + + void addRegDMRpRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRpRegs[getDMRpReg()])); + } + + void addRegWACCRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(WACCRegs[getACCReg()])); + } + + void addRegWACC_HIRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(WACC_HIRegs[getACCReg()])); + } + void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); @@ -1218,6 +1280,27 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) { } else if (Name.startswith_insensitive("cr") && !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { RegNo = CRRegs[IntVal]; + } else if (Name.startswith_insensitive("acc") && + !Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = ACCRegs[IntVal]; + } else if (Name.startswith_insensitive("wacc_hi") && + !Name.substr(7).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = ACCRegs[IntVal]; + } else if (Name.startswith_insensitive("wacc") && + !Name.substr(4).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = WACCRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrrowp") && + !Name.substr(7).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = DMRROWpRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrrow") && + !Name.substr(6).getAsInteger(10, IntVal) && IntVal < 64) { + RegNo = DMRROWRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrp") && + !Name.substr(4).getAsInteger(10, IntVal) && IntVal < 4) { + RegNo = DMRROWpRegs[IntVal]; + } else if (Name.startswith_insensitive("dmr") && + !Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = DMRRegs[IntVal]; } else return true; getParser().Lex(); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index d3d720054f16a..21fee2441f32a 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -17,7 +17,7 @@ using namespace llvm; -DEFINE_PPC_REGCLASSES; +DEFINE_PPC_REGCLASSES #define DEBUG_TYPE "ppc-disassembler" @@ -187,6 +187,45 @@ static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, ACCRegs); } +static DecodeStatus DecodeWACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, WACCRegs); +} + +static DecodeStatus DecodeWACC_HIRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, WACC_HIRegs); +} + +// TODO: Make this function static when the register class is used by a new +// instruction. +DecodeStatus DecodeDMRROWRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRROWRegs); +} + +static DecodeStatus DecodeDMRROWpRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRROWpRegs); +} + +static DecodeStatus DecodeDMRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRRegs); +} + +// TODO: Make this function static when the register class is used by a new +// instruction. +DecodeStatus DecodeDMRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRpRegs); +} + static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 3ca6f394f60b6..e4521aebad7ef 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -136,6 +136,17 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \ } +#define PPC_REGS0_63(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ + X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \ + X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31, \ + X##32, X##33, X##34, X##35, X##36, X##37, X##38, X##39, X##40, X##41, \ + X##42, X##43, X##44, X##45, X##46, X##47, X##48, X##49, X##50, X##51, \ + X##52, X##53, X##54, X##55, X##56, X##57, X##58, X##59, X##60, X##61, \ + X##62, X##63 \ + } + #define PPC_REGS_NO0_31(Z, X) \ { \ Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ @@ -155,6 +166,16 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { HI##28, HI##29, HI##30, HI##31 \ } +#define PPC_REGS0_7(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \ + } + +#define PPC_REGS0_3(X) \ + { \ + X##0, X##1, X##2, X##3 \ + } + using llvm::MCPhysReg; #define DEFINE_PPC_REGCLASSES \ @@ -185,5 +206,13 @@ using llvm::MCPhysReg; PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \ - static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC) + static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC); \ + static const MCPhysReg WACCRegs[8] = PPC_REGS0_7(PPC::WACC); \ + static const MCPhysReg WACC_HIRegs[8] = PPC_REGS0_7(PPC::WACC_HI); \ + static const MCPhysReg DMRROWpRegs[32] = PPC_REGS0_31(PPC::DMRROWp); \ + static const MCPhysReg DMRROWRegs[64] = PPC_REGS0_63(PPC::DMRROW); \ + static const MCPhysReg DMRRegs[8] = PPC_REGS0_7(PPC::DMR); \ + static const MCPhysReg DMRpRegs[4] = PPC_REGS0_3(PPC::DMRp); + + #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td new file mode 100644 index 0000000000000..63b77e46f01f4 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -0,0 +1,14 @@ +//===-- PPCInstrFuture.td - Future Instruction Set --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions introduced for the Future CPU. +// +//===----------------------------------------------------------------------===// + + diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td new file mode 100644 index 0000000000000..4da2969857d55 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -0,0 +1,116 @@ +//===-- PPCInstrFutureMMA.td - Future Instruction Set ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions introduced for the Future CPU for MMA. +// +//===----------------------------------------------------------------------===// + +class XX3Form_AT3_XABp5_P1 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<5> XAp; + bits<5> XBp; + bits<1> P; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-10} = 0; + let Inst{11-14} = XAp{3-0}; + let Inst{15} = P; + let Inst{16-19} = XBp{3-0}; + let Inst{20} = 0; + let Inst{21-28} = xo; + let Inst{29} = XAp{4}; + let Inst{30} = XBp{4}; + let Inst{31} = 0; +} + +class XX2Form_AT3_XBp5_P2 opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<5> XBp; + bits<2> P; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-14} = 0; + let Inst{15} = P{0}; + let Inst{16-19} = XBp{3-0}; + let Inst{20} = P{1}; + let Inst{21-29} = xo; + let Inst{30} = XBp{4}; + let Inst{31} = 0; +} + +class XForm_ATB3 opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<3> AB; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-10} = 0; + let Inst{11-15} = o; + let Inst{16-18} = AB{2-0}; + let Inst{19-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +let Predicates = [IsISAFuture] in { + def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226, + (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc:$AT), + "dmxxextfdmr512 $AT, $XAp, $XBp, 0", []> { + let P = 0; + } + + def DMXXEXTFDMR512_HI : XX3Form_AT3_XABp5_P1<60, 226, + (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc_hi:$AT), + "dmxxextfdmr512 $AT, $XAp, $XBp, 1", []> { + let P = 1; + } + + def DMXXINSTFDMR512 : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstfdmr512 $AT, $XAp, $XBp, 0", []> { + let P = 0; + } + + def DMXXINSTFDMR512_HI : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc_hi:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstfdmr512 $AT, $XAp, $XBp, 1", []> { + let P = 1; + } + + def DMXXEXTFDMR256 : XX2Form_AT3_XBp5_P2<60, 484, (outs vsrprc:$XBp), + (ins dmrrowp:$AT, u2imm:$P), + "dmxxextfdmr256 $AT, $XBp, $P", []>; + + def DMXXINSTFDMR256 : XX2Form_AT3_XBp5_P2<60, 485, (outs dmrrowp:$AT), + (ins vsrprc:$XBp, u2imm:$P), + "dmxxinstfdmr256 $AT, $XBp, $P", []>; + + def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB), + "dmmr $AT, $AB", []>; + + def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB), + "dmxor $AT, $AB", []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + + def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), + "dmsetdmrz $AT", NoItinerary, []>; +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index ea4ef12bf21d2..8c8891b4b05cd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3278,6 +3278,8 @@ def : Pat<(not i1:$in), // Prefixed instructions may require access to the above defs at a later // time so we include this after the def. include "PPCInstrP10.td" +include "PPCInstrFutureMMA.td" +include "PPCInstrFuture.td" include "PPCInstrMMA.td" // Patterns for arithmetic i1 operations. diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index aaa841fffa1b3..fea1a3afab1c2 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -184,7 +184,33 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { return RegName + 2; } return RegName + 1; - case 'c': if (RegName[1] == 'r') return RegName + 2; + case 'c': + if (RegName[1] == 'r') + return RegName + 2; + break; + case 'w': + // For wacc and wacc_hi + if (RegName[1] == 'a' && RegName[2] == 'c' && RegName[3] == 'c') { + if (RegName[4] == '_') + return RegName + 7; + else + return RegName + 4; + } + break; + case 'd': + // For dmr, dmrp, dmrrow, dmrrowp + if (RegName[1] == 'm' && RegName[2] == 'r') { + if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w' && + RegName[6] == 'p') + return RegName + 7; + else if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w') + return RegName + 6; + else if (RegName[3] == 'p') + return RegName + 4; + else + return RegName + 3; + } + break; } return RegName; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 49b26cd160608..32f8163a38828 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -446,6 +446,7 @@ def G8pRC : } include "PPCRegisterInfoMMA.td" +include "PPCRegisterInfoDMR.td" //===----------------------------------------------------------------------===// // PowerPC Operand Definitions. @@ -1013,3 +1014,48 @@ def acc : RegisterOperand { def uacc : RegisterOperand { let ParserMatchClass = PPCRegACCRCAsmOperand; } + +// DMR Register Operands +def PPCRegDMRROWRCAsmOperand : AsmOperandClass { + let Name = "RegDMRROWRC"; + let PredicateMethod = "isDMRROWRegNumber"; +} + +def dmrrow : RegisterOperand { + let ParserMatchClass = PPCRegDMRROWRCAsmOperand; +} + +def PPCRegDMRROWpRCAsmOperand : AsmOperandClass { + let Name = "RegDMRROWpRC"; + let PredicateMethod = "isDMRROWpRegNumber"; +} + +def dmrrowp : RegisterOperand { + let ParserMatchClass = PPCRegDMRROWpRCAsmOperand; +} + +def wacc : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def wacc_hi : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def PPCRegDMRRCAsmOperand : AsmOperandClass { + let Name = "RegDMRRC"; + let PredicateMethod = "isDMRRegNumber"; +} + +def dmr : RegisterOperand { + let ParserMatchClass = PPCRegDMRRCAsmOperand; +} + +def PPCRegDMRpRCAsmOperand : AsmOperandClass { + let Name = "RegDMRpRC"; + let PredicateMethod = "isDMRpRegNumber"; +} + +def dmrp : RegisterOperand { + let ParserMatchClass = PPCRegDMRpRCAsmOperand; +} diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td new file mode 100644 index 0000000000000..1c3e7621825bd --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td @@ -0,0 +1,164 @@ +//===- PPCRegisterInfoDMR.td - The PowerPC Register File *- tablegen -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register info specific to Power PC Dense Math Registers(DMR). +// +// Register classes in this file are related to the Dense Math Registers (DMR). +// There are a total of 8 DMR registers numbered 0 to 7. +// The 4 different views of each DMR register. +// +// [ DMR0 ] +// | WACC0 | WACC_HI0 | +// | DMRROWp0 | DMRROWp1 | DMRROWp2 | DMRROWp3 | +// |DMRROW0|DMRROW1|DMRROW2|DMRROW3|DMRROW4|DMRROW5|DMRROW6|DMRROW7| +// [128bits|128bits|128bits|128bits|128bits|128bits|128bits|128bits] +// +// In addition to the above classes two consecutive DMR registers make a DMR +// DMR pair (DMRp) that is 2048 bits. +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_dmrrow0 : SubRegIndex<128>; +def sub_dmrrow1 : SubRegIndex<128, 128>; +def sub_dmrrowp0 : SubRegIndex<256>; +def sub_dmrrowp1 : SubRegIndex<256, 256>; +def sub_wacc_lo : SubRegIndex<512>; +def sub_wacc_hi : SubRegIndex<512, 512>; +def sub_dmr0 : SubRegIndex<1024>; +def sub_dmr1 : SubRegIndex<1024, 1024>; +} + +// A single row in a DMR register. +// There are 8 128 bit rows in each DMR register and 8 DMR registers so that +// makes 64 DMRROW registers in total. +class DMRROW num, string n> : PPCReg { + let HWEncoding{5-0} = num; +} + +// A consecutive pair of DMR row registers. +class DMRROWp num, string n, list subregs> : PPCReg { + let HWEncoding{4-0} = num; + let SubRegs = subregs; +} + +// WACC - Wide ACC registers. Accumulator registers that are subregs of DMR. +// These ACC registers no longer include VSR regs as subregs. +class WACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// High bits for the ACC registers. +// When the ACC register is used these bits are ignored. +// When the ACC register is the target, these bits are set to zero. +class WACC_HI num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +class DMR num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +class DMRp num, string n, list subregs> : PPCReg { + let HWEncoding{1-0} = num; + let SubRegs = subregs; +} + +// The DMR Row type registers are the lowest level of registers and have no +// subregs. +foreach Index = 0-63 in { + def DMRROW#Index : DMRROW, DwarfRegNum<[-1, -1]>; +} + +// DMRROW pairs are consecutive pairs. +// DMRROWp0 = DMRROW0, DMRROW1 +// DMRROWp1 = DMRROW2, DMRROW3 +// DMRROWp2 = DMRROW4, DMRROW5 +// etc... +let SubRegIndices = [sub_dmrrow0, sub_dmrrow1] in { + foreach Index = 0-31 in { + def DMRROWp#Index : DMRROWp("DMRROW"#!mul(Index, 2)), + !cast("DMRROW"#!add(!mul(Index, 2), 1))]>, DwarfRegNum<[-1, -1]>; + } +} + +let SubRegIndices = [sub_dmrrowp0, sub_dmrrowp1] in { + // WACC0 = DMRROWp0, DMRROWp1 + // WACC1 = DMRROWp4, DMRROWp5 + // WACC2 = DMRROWp8, DMRROWp9 + // etc... + foreach Index = 0-7 in { + def WACC#Index : WACC("DMRROWp"#!mul(Index, 4)), + !cast("DMRROWp"#!add(!mul(Index, 4), 1))]>, DwarfRegNum<[-1, -1]>; + } + + // WACC_HI0 = DMRROWp2, DMRROWp3 + // WACC_HI1 = DMRROWp6, DMRROWp7 + // WACC_HI2 = DMRROWp10, DMRROWp11 + // etc... + foreach Index = 0-7 in { + def WACC_HI#Index : WACC_HI("DMRROWp"#!add(!mul(Index, 4), 2)), + !cast("DMRROWp"#!add(!mul(Index, 4), 3))]>, DwarfRegNum<[-1, -1]>; + } +} + +// DMR0 = WACC0, WACC_HI0 +// DMR1 = WACC1, WACC_HI1 +// DMR2 = WACC2, WACC_HI2 +// etc... +let SubRegIndices = [sub_wacc_lo, sub_wacc_hi] in { + foreach Index = 0-7 in { + def DMR#Index : DMR("WACC"#Index), !cast("WACC_HI"#Index)]>, DwarfRegNum<[-1, -1]>; + } +} + +// DMRp0 = DMR0, DMR1 +// DMRp1 = DMR2, DMR3 +// DMRp2 = DMR4, DMR5 +// DMRp3 = DMR6, DMR7 +let SubRegIndices = [sub_dmr0, sub_dmr1] in { + def DMRp0 : DMRp<0, "dmrp0", [DMR0, DMR1]>, DwarfRegNum<[-1, -1]>; + def DMRp1 : DMRp<1, "dmrp1", [DMR2, DMR3]>, DwarfRegNum<[-1, -1]>; + def DMRp2 : DMRp<2, "dmrp2", [DMR4, DMR5]>, DwarfRegNum<[-1, -1]>; + def DMRp3 : DMRp<3, "dmrp3", [DMR6, DMR7]>, DwarfRegNum<[-1, -1]>; +} + +def DMRROWRC : RegisterClass<"PPC", [v128i1], 128, + (add (sequence "DMRROW%u", 0, 63))> { + let Size = 128; +} + +def DMRROWpRC : RegisterClass<"PPC", [v256i1], 128, + (add (sequence "DMRROWp%u", 0, 31))> { + let Size = 256; +} + +def WACCRC : RegisterClass<"PPC", [v512i1], 128, + (add (sequence "WACC%u", 0, 7))> { + let Size = 512; +} + +def WACC_HIRC : RegisterClass<"PPC", [v512i1], 128, + (add (sequence "WACC_HI%u", 0, 7))> { + let Size = 512; +} + +def DMRRC : RegisterClass<"PPC", [v1024i1], 128, + (add (sequence "DMR%u", 0, 7))> { + let Size = 1024; +} + +def DMRpRC : RegisterClass<"PPC", [v2048i1], 128, + (add DMRp0, DMRp1, DMRp2, DMRp3)> { + let Size = 2048; +} diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index d350111717159..e9f4daa62de3c 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -38,10 +38,11 @@ def P9Model : SchedMachineModel { let CompleteModel = 1; - // Do not support SPE (Signal Processing Engine), prefixed instructions on - // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions - // introduced in ISA 3.1. - let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, + // Do not support SPE (Signal Processing Engine) or prefixed instructions on + // Power 9, or MMA, or paired vector mem ops, or PC relative mem ops, or + // instructions introduced after ISA 3.0. + let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA, + PairedVectorMemops, PCRelativeMemops, IsISA3_1, IsISAFuture]; } diff --git a/llvm/test/CodeGen/PowerPC/future-check-features.ll b/llvm/test/CodeGen/PowerPC/future-check-features.ll index 1dca4dafd5b64..f881119335d09 100644 --- a/llvm/test/CodeGen/PowerPC/future-check-features.ll +++ b/llvm/test/CodeGen/PowerPC/future-check-features.ll @@ -1,9 +1,19 @@ -; RUN: llc -mattr=pcrelative-memops,prefix-instrs,paired-vector-memops,mma,rop-protect,privileged \ +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ ; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s -; RUN: llc -mattr=pcrelative-memops,prefix-instrs,paired-vector-memops,mma,rop-protect,privileged \ +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ ; RUN: -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ +; RUN: -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ +; RUN: -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s define dso_local signext i32 @f() { entry: diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt new file mode 100644 index 0000000000000..87059b9e3c16c --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -0,0 +1,41 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \ +# RUN: -mcpu=future | FileCheck %s + +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-aix-gnu \ +# RUN: -mcpu=future | FileCheck %s + +# RUN: llvm-mc --disassemble %s -triple powerpc-unknown-aix-gnu \ +# RUN: -mcpu=future | FileCheck %s + +#CHECK: dmxxextfdmr512 1, 2, 34, 0 +0xf0 0x82 0x17 0x12 + +#CHECK: dmxxextfdmr512 1, 2, 34, 1 +0xf0 0x83 0x17 0x12 + +#CHECK: dmxxextfdmr256 3, 8, 0 +0xf1 0x80 0x47 0x90 + +#CHECK: dmxxextfdmr256 3, 8, 3 +0xf1 0x81 0x4f 0x90 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 0 +0xf0 0x82 0x17 0x52 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 1 +0xf0 0x83 0x17 0x52 + +#CHECK: dmxxinstfdmr256 3, 8, 0 +0xf1 0x80 0x47 0x94 + +#CHECK: dmxxinstfdmr256 3, 8, 3 +0xf1 0x81 0x4f 0x94 + +#CHECK: dmsetdmrz 3 +0x7d 0x82 0x01 0x62 + +#CHECK: dmmr 4, 5 +0x7e 0x06 0xa1 0x62 + +#CHECK: dmxor 6, 7 +0x7f 0x07 0xe1 0x62 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt new file mode 100644 index 0000000000000..32dfcb28bf57c --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -0,0 +1,35 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64le-unknown-unknown \ +# RUN: -mcpu=future | FileCheck %s + +#CHECK: dmxxextfdmr512 1, 2, 34, 0 +0x12 0x17 0x82 0xf0 + +#CHECK: dmxxextfdmr512 1, 2, 34, 1 +0x12 0x17 0x83 0xf0 + +#CHECK: dmxxextfdmr256 3, 8, 0 +0x90 0x47 0x80 0xf1 + +#CHECK: dmxxextfdmr256 3, 8, 3 +0x90 0x4f 0x81 0xf1 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 0 +0x52 0x17 0x82 0xf0 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 1 +0x52 0x17 0x83 0xf0 + +#CHECK: dmxxinstfdmr256 3, 8, 0 +0x94 0x47 0x80 0xf1 + +#CHECK: dmxxinstfdmr256 3, 8, 3 +0x94 0x4f 0x81 0xf1 + +#CHECK: dmsetdmrz 3 +0x62 0x01 0x82 0x7d + +#CHECK: dmmr 4, 5 +0x62 0xa1 0x06 0x7e + +#CHECK: dmxor 6, 7 +0x62 0xe1 0x07 0x7f diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s new file mode 100644 index 0000000000000..2f7986f221c97 --- /dev/null +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -0,0 +1,50 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-LE %s +# RUN: llvm-mc -triple powerpc-unknown-aix-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s + +# CHECK-BE: dmxxextfdmr512 1, 2, 34, 0 # encoding: [0xf0,0x82,0x17,0x12] +# CHECK-LE: dmxxextfdmr512 1, 2, 34, 0 # encoding: [0x12,0x17,0x82,0xf0] + dmxxextfdmr512 1, 2, 34, 0 + +# CHECK-BE: dmxxextfdmr512 1, 2, 34, 1 # encoding: [0xf0,0x83,0x17,0x12] +# CHECK-LE: dmxxextfdmr512 1, 2, 34, 1 # encoding: [0x12,0x17,0x83,0xf0] + dmxxextfdmr512 1, 2, 34, 1 + +# CHECK-BE: dmxxextfdmr256 3, 8, 0 # encoding: [0xf1,0x80,0x47,0x90] +# CHECK-LE: dmxxextfdmr256 3, 8, 0 # encoding: [0x90,0x47,0x80,0xf1] + dmxxextfdmr256 3, 8, 0 + +# CHECK-BE: dmxxextfdmr256 3, 8, 3 # encoding: [0xf1,0x81,0x4f,0x90] +# CHECK-LE: dmxxextfdmr256 3, 8, 3 # encoding: [0x90,0x4f,0x81,0xf1] + dmxxextfdmr256 3, 8, 3 + +# CHECK-BE: dmxxinstfdmr512 1, 2, 34, 0 # encoding: [0xf0,0x82,0x17,0x52] +# CHECK-LE: dmxxinstfdmr512 1, 2, 34, 0 # encoding: [0x52,0x17,0x82,0xf0] + dmxxinstfdmr512 1, 2, 34, 0 + +# CHECK-BE: dmxxinstfdmr512 1, 2, 34, 1 # encoding: [0xf0,0x83,0x17,0x52] +# CHECK-LE: dmxxinstfdmr512 1, 2, 34, 1 # encoding: [0x52,0x17,0x83,0xf0] + dmxxinstfdmr512 1, 2, 34, 1 + +# CHECK-BE: dmxxinstfdmr256 3, 8, 0 # encoding: [0xf1,0x80,0x47,0x94] +# CHECK-LE: dmxxinstfdmr256 3, 8, 0 # encoding: [0x94,0x47,0x80,0xf1] + dmxxinstfdmr256 3, 8, 0 + +# CHECK-BE: dmxxinstfdmr256 3, 8, 3 # encoding: [0xf1,0x81,0x4f,0x94] +# CHECK-LE: dmxxinstfdmr256 3, 8, 3 # encoding: [0x94,0x4f,0x81,0xf1] + dmxxinstfdmr256 3, 8, 3 + +# CHECK-BE: dmsetdmrz 3 # encoding: [0x7d,0x82,0x01,0x62] +# CHECK-LE: dmsetdmrz 3 # encoding: [0x62,0x01,0x82,0x7d] + dmsetdmrz 3 + +# CHECK-BE: dmmr 4, 5 # encoding: [0x7e,0x06,0xa1,0x62] +# CHECK-LE: dmmr 4, 5 # encoding: [0x62,0xa1,0x06,0x7e] + dmmr 4, 5 + +# CHECK-BE: dmxor 6, 7 # encoding: [0x7f,0x07,0xe1,0x62] +# CHECK-LE: dmxor 6, 7 # encoding: [0x62,0xe1,0x07,0x7f] + dmxor 6, 7 diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 8384a8ceab625..fba168f6e6981 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -91,6 +91,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v256i1: return "MVT::v256i1"; case MVT::v512i1: return "MVT::v512i1"; case MVT::v1024i1: return "MVT::v1024i1"; + case MVT::v2048i1: return "MVT::v2048i1"; case MVT::v128i2: return "MVT::v128i2"; case MVT::v256i2: return "MVT::v256i2"; case MVT::v64i4: return "MVT::v64i4"; From c188910694ab821aabc0ca11f4636b69f5f7b4f1 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 2 Nov 2022 10:18:57 -0700 Subject: [PATCH 140/516] [lldb][Test] Make TestFrameFormatNameWithArgs.test more compatible across platforms On Linux the `std::function` behaved differently to that on Darwin. This patch removes usage of `std::function` in the test but attempts to retain the test-coverage. We mainly want function types appearing in the template argument and function argument lists. Also add a `char const*` overload to one of the test functions to cover the "format function argument using ValueObject formatter" code-path. Differential Revision: https://reviews.llvm.org/D137272 --- lldb/test/Shell/Settings/Inputs/names.cpp | 23 +++++++++---------- .../Settings/TestFrameFormatNameWithArgs.test | 16 ++++++------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/lldb/test/Shell/Settings/Inputs/names.cpp b/lldb/test/Shell/Settings/Inputs/names.cpp index 461c6d091a0f4..cf6982abb8f35 100644 --- a/lldb/test/Shell/Settings/Inputs/names.cpp +++ b/lldb/test/Shell/Settings/Inputs/names.cpp @@ -1,5 +1,3 @@ -#include - namespace detail { template struct Quux {}; } // namespace detail @@ -7,15 +5,16 @@ template struct Quux {}; using FuncPtr = detail::Quux (*(*)(int))(float); struct Foo { - template void foo(T const &t) const noexcept(true) {} + template void foo(T arg) const noexcept(true) {} - template void operator<<(size_t) {} + template void operator<<(int) {} template FuncPtr returns_func_ptr(detail::Quux &&) const noexcept(false) { return nullptr; } }; namespace ns { -template int foo(T const &t) noexcept(false) { return 0; } +template int foo(char const *str) noexcept(false) { return 0; } +template int foo(T t) { return 1; } template FuncPtr returns_func_ptr(detail::Quux &&) { return nullptr; } } // namespace ns @@ -24,20 +23,20 @@ int bar() { return 1; } namespace { int anon_bar() { return 1; } -auto anon_lambda = [](std::function) mutable {}; +auto anon_lambda = [] {}; } // namespace int main() { - ns::foo(bar); - ns::foo(std::function{bar}); + ns::foo(bar); + ns::foo("bar"); ns::foo(anon_lambda); - ns::foo(std::function{anon_bar}); - ns::foo(&Foo::foo>); + ns::foo(anon_bar); + ns::foo)>("method"); ns::returns_func_ptr(detail::Quux{}); Foo f; - f.foo(std::function{bar}); - f.foo(std::function{anon_bar}); + f.foo(anon_bar); f.operator<< <(2 > 1)>(0); f.returns_func_ptr(detail::Quux{}); + return 0; } diff --git a/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test b/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test index d990114f57845..dc4dedadee80a 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test +++ b/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test @@ -1,4 +1,4 @@ -# REQUIRES: system-darwin +# UNSUPPORTED: system-windows # RUN: %clangxx_host -g -O0 %S/Inputs/names.cpp -std=c++17 -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s settings set -f frame-format "frame ${function.name-with-args}\n" @@ -8,21 +8,19 @@ break set -n returns_func_ptr run # CHECK: frame int ns::foo(t={{.*}}) c -# CHECK: frame int ns::foo>(t= Function = bar() ) +# CHECK: frame int ns::foo(str="bar") c -# CHECK: frame int ns::foo<(anonymous namespace)::$_0>(t={{.*}}) +# CHECK: frame int ns::foo<(anonymous namespace)::$_0>(t=(anonymous namespace)::(unnamed class) @ {{.*}}) c -# CHECK: frame int ns::foo>(t= Function = (anonymous namespace)::anon_bar() ) +# CHECK: frame int ns::foo(t=({{.*}}`(anonymous namespace)::anon_bar() at {{.*}})) c -# CHECK: frame int ns::foo const&) const noexcept>(t={{.*}}) +# CHECK: frame int ns::foo(str="method") c # CHECK: frame ns::returns_func_ptr((null)={{.*}}) c -# CHECK: frame void Foo::foo>(this={{.*}}, t= Function = bar() ) const +# CHECK: frame void Foo::foo(this={{.*}}, arg=({{.*}}`(anonymous namespace)::anon_bar() at {{.*}})) c -# CHECK: frame void Foo::foo>(this={{.*}}, t= Function = (anonymous namespace)::anon_bar() ) const -c -# CHECK: frame void Foo::operator<<<1ul>(this={{.*}}, (null)=0) +# CHECK: frame void Foo::operator<<<1>(this={{.*}}, (null)=0) c # CHECK: frame Foo::returns_func_ptr(this={{.*}}, (null)={{.*}}) q From fac26edae504555e383032fafd7921cb9d9a2022 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Thu, 3 Nov 2022 13:46:00 +0000 Subject: [PATCH 141/516] Revert "[Assignment Tracking][3/*] Add DIAssignID metadata boilerplate" This reverts commit c285df77e9b78f971f9cd9d025248c20b030cc2a. A sanitizer bot found an issue: https://lab.llvm.org/buildbot/#/builders/5/builds/28809/steps/13/logs/stdio --- llvm/include/llvm-c/DebugInfo.h | 3 +- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 3 +- llvm/include/llvm/IR/DebugInfoMetadata.h | 36 ------------------- llvm/include/llvm/IR/FixedMetadataKinds.def | 1 - llvm/include/llvm/IR/Metadata.def | 1 - llvm/lib/AsmParser/LLParser.cpp | 18 ---------- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 13 ------- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 11 ------ llvm/lib/IR/AsmWriter.cpp | 6 ---- llvm/lib/IR/DebugInfo.cpp | 8 ++--- llvm/lib/IR/DebugInfoMetadata.cpp | 7 ---- llvm/lib/IR/Verifier.cpp | 17 --------- .../parse-and-verify/distinct.ll | 9 ----- .../parse-and-verify/instruction-type.ll | 36 ------------------- .../parse-and-verify/operands.ll | 9 ----- .../parse-and-verify/roundtrip.ll | 33 ----------------- 16 files changed, 4 insertions(+), 207 deletions(-) delete mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll delete mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll delete mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll delete mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index ef6a147eb2a52..8554a01998736 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -169,8 +169,7 @@ enum { LLVMDICommonBlockMetadataKind, LLVMDIStringTypeMetadataKind, LLVMDIGenericSubrangeMetadataKind, - LLVMDIArgListMetadataKind, - LLVMDIAssignIDMetadataKind, + LLVMDIArgListMetadataKind }; typedef unsigned LLVMMetadataKind; diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 74a51d5ce6907..ee5669c6c6aa8 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -349,8 +349,7 @@ enum MetadataCodes { // info. METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] METADATA_GENERIC_SUBRANGE = 45, // [distinct, count, lo, up, stride] - METADATA_ARG_LIST = 46, // [n x [type num, value num]] - METADATA_ASSIGN_ID = 47, // [distinct, ...] + METADATA_ARG_LIST = 46 // [n x [type num, value num]] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index f57691f6f9fc6..5b20bf3ade99a 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -215,7 +215,6 @@ class DINode : public MDNode { case DIImportedEntityKind: case DIModuleKind: case DIGenericSubrangeKind: - case DIAssignIDKind: return true; } } @@ -296,41 +295,6 @@ class GenericDINode : public DINode { } }; -/// Assignment ID. -/// Used to link stores (as an attachment) and dbg.assigns (as an operand). -/// DIAssignID metadata is never uniqued as we compare instances using -/// referential equality (the instance/address is the ID). -class DIAssignID : public MDNode { - friend class LLVMContextImpl; - friend class MDNode; - - DIAssignID(LLVMContext &C, StorageType Storage) - : MDNode(C, DIAssignIDKind, Storage, None) {} - - ~DIAssignID() { dropAllReferences(); } - - static DIAssignID *getImpl(LLVMContext &Context, StorageType Storage, - bool ShouldCreate = true); - - TempDIAssignID cloneImpl() const { return getTemporary(getContext()); } - -public: - // This node has no operands to replace. - void replaceOperandWith(unsigned I, Metadata *New) = delete; - - static DIAssignID *getDistinct(LLVMContext &Context) { - return getImpl(Context, Distinct); - } - static TempDIAssignID getTemporary(LLVMContext &Context) { - return TempDIAssignID(getImpl(Context, Temporary)); - } - // NOTE: Do not define get(LLVMContext&) - see class comment. - - static bool classof(const Metadata *MD) { - return MD->getMetadataID() == DIAssignIDKind; - } -}; - /// Array subrange. /// /// TODO: Merge into node for DW_TAG_array_type, which should have a custom diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 8723bf2a0680c..3d986325c5d33 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -49,4 +49,3 @@ LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) -LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index 36c34c1d2347c..bbf349e6b508c 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -110,7 +110,6 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILabel) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIAssignID) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 43e47aa33c863..0fda0559b5b41 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4646,24 +4646,6 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { return false; } -/// parseDIAssignID: -/// ::= distinct !DIAssignID() -bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { - if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DIAssignID()"); - - Lex.Lex(); - - // Now eat the parens. - if (parseToken(lltok::lparen, "expected '(' here")) - return true; - if (parseToken(lltok::rparen, "expected ')' here")) - return true; - - Result = DIAssignID::getDistinct(Context); - return false; -} - /// parseGenericDINode: /// ::= !GenericDINode(tag: 15, header: "...", operands: {...}) bool LLParser::parseGenericDINode(MDNode *&Result, bool IsDistinct) { diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 1ac1502e8aefb..02d76f61695af 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -856,7 +856,6 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { case bitc::METADATA_TEMPLATE_VALUE: case bitc::METADATA_GLOBAL_VAR: case bitc::METADATA_LOCAL_VAR: - case bitc::METADATA_ASSIGN_ID: case bitc::METADATA_LABEL: case bitc::METADATA_EXPRESSION: case bitc::METADATA_OBJC_PROPERTY: @@ -1965,18 +1964,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } - case bitc::METADATA_ASSIGN_ID: { - if (Record.size() != 1) - return error("Invalid DIAssignID record."); - - IsDistinct = Record[0] & 1; - if (!IsDistinct) - return error("Invalid DIAssignID record. Must be distinct"); - - MetadataList.assignValue(DIAssignID::getDistinct(Context), NextMetadataNo); - NextMetadataNo++; - break; - } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. if (Record.size() < 8 || Record.size() > 10) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index bed3ebad9874e..1ac4413f158eb 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,8 +340,6 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { unsigned Abbrev); void writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev); - void writeDIAssignID(const DIAssignID *N, SmallVectorImpl &Record, - unsigned Abbrev); void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev); @@ -1957,15 +1955,6 @@ void ModuleBitcodeWriter::writeDIModule(const DIModule *N, Record.clear(); } -void ModuleBitcodeWriter::writeDIAssignID(const DIAssignID *N, - SmallVectorImpl &Record, - unsigned Abbrev) { - // There are no arguments for this metadata type. - Record.push_back(N->isDistinct()); - Stream.EmitRecord(bitc::METADATA_ASSIGN_ID, Record, Abbrev); - Record.clear(); -} - void ModuleBitcodeWriter::writeDITemplateTypeParameter( const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index d49b8710bc9a4..21e662bed6b25 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1865,12 +1865,6 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL, Out << ")"; } -static void writeDIAssignID(raw_ostream &Out, const DIAssignID *DL, - AsmWriterContext &WriterCtx) { - Out << "!DIAssignID()"; - MDFieldPrinter Printer(Out, WriterCtx); -} - static void writeDISubrange(raw_ostream &Out, const DISubrange *N, AsmWriterContext &WriterCtx) { Out << "!DISubrange("; diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index d30fca63067c0..8f6d58cb90b90 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -469,13 +469,9 @@ bool llvm::stripDebugInfo(Function &F) { if (NewLoopID != LoopID) I.setMetadata(LLVMContext::MD_loop, NewLoopID); } - // Strip other attachments that are or use debug info. - if (I.hasMetadataOtherThanDebugLoc()) { - // Heapallocsites point into the DIType system. + // Strip heapallocsite attachments, they point into the DIType system. + if (I.hasMetadataOtherThanDebugLoc()) I.setMetadata("heapallocsite", nullptr); - // DIAssignID are debug info metadata primitives. - I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); - } } } return Changed; diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 2359e56c08684..9b4f92a63c5e2 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1253,13 +1253,6 @@ bool DIExpression::startsWithDeref() const { return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; } -DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage, - bool ShouldCreate) { - // Uniqued DIAssignID are not supported as the instance address *is* the ID. - assert(Storage != StorageType::Uniqued && "uniqued DIAssignID unsupported"); - return new (0u, Storage) DIAssignID(Context, Storage); -} - unsigned DIExpression::ExprOperand::getSize() const { uint64_t Op = getOp(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c9b2d6aa5abf2..0614f206981a1 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -471,7 +471,6 @@ class Verifier : public InstVisitor, VerifierSupport { void visitCallStackMetadata(MDNode *MD); void visitMemProfMetadata(Instruction &I, MDNode *MD); void visitCallsiteMetadata(Instruction &I, MDNode *MD); - void visitDIAssignIDMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -1484,11 +1483,6 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { CheckDI(!isa(Ty), "invalid type", &N, N.getType()); } -void Verifier::visitDIAssignID(const DIAssignID &N) { - CheckDI(!N.getNumOperands(), "DIAssignID has no arguments", &N); - CheckDI(N.isDistinct(), "DIAssignID must be distinct", &N); -} - void Verifier::visitDILabel(const DILabel &N) { if (auto *S = N.getRawScope()) CheckDI(isa(S), "invalid scope", &N, S); @@ -4555,14 +4549,6 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } -void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { - assert(I.hasMetadata(LLVMContext::MD_DIAssignID)); - bool ExpectedInstTy = - isa(I) || isa(I) || isa(I); - CheckDI(ExpectedInstTy, "!DIAssignID attached to unexpected instruction kind", - I, MD); -} - void Verifier::visitCallStackMetadata(MDNode *MD) { // Call stack metadata should consist of a list of at least 1 constant int // (representing a hash of the location). @@ -4864,9 +4850,6 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) visitCallsiteMetadata(I, MD); - if (MDNode *MD = I.getMetadata(LLVMContext::MD_DIAssignID)) - visitDIAssignIDMetadata(I, MD); - if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll deleted file mode 100644 index 2cc5452fe7d2a..0000000000000 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ -; RUN: | FileCheck %s - -;; Check that badly formed assignment tracking metadata is caught either -;; while parsing or by the verifier. - -; CHECK: error: missing 'distinct', required for !DIAssignID() - -!1 = !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll deleted file mode 100644 index d0f447ee200b6..0000000000000 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt -S %s -verify -experimental-assignment-tracking 2>&1 \ -; RUN: | FileCheck %s - -;; NOTE: Expect opt to return zero because the badly formed debug info -;; is going to be stripped. - -;; Check that badly formed assignment tracking metadata is caught either -;; while parsing or by the verifier. - -;; Check verifier output. -; CHECK: !DIAssignID attached to unexpected instruction kind - -;; Check DIAssignID is stripped from IR. -; CHECK: define dso_local void @fun() { -; CHECK-NOT: DIAssignID - -define dso_local void @fun() !dbg !7 { -entry: - ret void, !DIAssignID !14 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "test.c", directory: "/") -!2 = !{} -!3 = !{i32 7, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 14.0.0"} -!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{null} -!14 = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll deleted file mode 100644 index 79adcb9ce2d12..0000000000000 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ -; RUN: | FileCheck %s - -;; Check that badly formed assignment tracking metadata is caught either -;; while parsing or by the verifier. - -; CHECK: error: expected ')' here - -!1 = distinct !DIAssignID(0) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll deleted file mode 100644 index 1ddb95b79b0f0..0000000000000 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: opt %s -verify -experimental-assignment-tracking \ -; RUN: | opt -verify -S -experimental-assignment-tracking \ -; RUN: | FileCheck %s - -;; Roundtrip test (text -> bitcode -> text) for DIAssignID attachments. - -; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID:[0-9]+]] -; CHECK-DAG: ![[ID]] = distinct !DIAssignID() - -define dso_local void @fun() !dbg !7 { -entry: - %local = alloca i32, align 4, !DIAssignID !14 - ret void, !dbg !13 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "test.c", directory: "/") -!2 = !{} -!3 = !{i32 7, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 14.0.0"} -!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{null} -!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) -!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!13 = !DILocation(line: 3, column: 1, scope: !7) -!14 = distinct !DIAssignID() From dc9854d4e91e5ede86e8ebb9bfea244cf494e8c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 3 Nov 2022 15:02:11 +0100 Subject: [PATCH 142/516] [JITLink] Fix check-line in Windows X86 COFF COMDAT test This test caught my attention because it's the only one in JITLink that XFAILs. Running it in isolation showed that the output doesn't meet the CHECK-LINES, i.e. the block address didn't match: ``` error: CHECK-NEXT: expected string not found in input CHECK-NEXT: block 0xfff02000 size = 0x00000001, align = 16, alignment-offset = 0 :22:2: note: possible intended match here block 0xfff01000 size = 0x00000001, align = 16, alignment-offset = 0 ``` Though, that doesn't appear to be the reason the test XFAILs. What we really want to check here is that llvm-jitlink doesn't fail with a duplicate section error yet. In order to avoid issues like this in the future we can match a placeholder to check for some valid address within the slab (64Kb == last 4 digits). The patch also drops the duplicate -noexec argument, removes an empty RUN-line, fixes indentation and adds a newline at EOF. Reviewed By: sunho Differential Revision: https://reviews.llvm.org/D137148 --- .../X86/COFF_comdat_weak_plus_strong.s | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s b/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s index 30e0193b11a65..2754855e428e0 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s @@ -1,22 +1,23 @@ # FIXME: Comdat any + ordinary strong symbol should generate duplicate section error # XFAIL: * +# # RUN: rm -rf %t && mkdir -p %t # RUN: yaml2obj %S/Inputs/COFF_comdat_weak_def.yaml -o %t/COFF_weak_1.o # RUN: yaml2obj %S/Inputs/COFF_strong_def.yaml -o %t/COFF_strong.o # RUN: llvm-mc -filetype=obj -triple=x86_64-windows-msvc %s -o %t/COFF_main.o -# RUN: +# # RUN: not llvm-jitlink -noexec %t/COFF_main.o %t/COFF_weak_1.o %t/COFF_strong.o \ -# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ -# RUN: -show-graph -noexec 2>&1 | FileCheck %s +# RUN: -slab-allocate 64Kb -slab-address 0xfff00000 \ +# RUN: -slab-page-size 4096 -show-graph 2>&1 | FileCheck %s # -# Check that a combination of comdat any definition and strong definition generate -# duplicate definition error. +# Check that a combination of comdat any definition and strong definition +# generate duplicate definition error. # # CHECK: section strongfunc: # CHECK-EMPTY: -# CHECK-NEXT: block 0xfff02000 size = 0x00000001, align = 16, alignment-offset = 0 +# CHECK-NEXT: block 0xfff0[[LO:[0-9a-f]+]] size = 0x00000001, align = 16, alignment-offset = 0 # CHECK-NEXT: symbols: -# CHECK-NEXT: 0xfff02000 (block + 0x00000000): size: 0x00000001, linkage: strong, scope: default, live - func +# CHECK-NEXT: 0xfff0[[LO]] (block + 0x00000000): size: 0x00000001, linkage: strong, scope: default, live - func # CHECK-NEXT: no edges .text @@ -28,5 +29,5 @@ .globl main .p2align 4, 0x90 main: - callq func - retq \ No newline at end of file + callq func + retq From b03f7c3365b1c6ae692b06685a6266d359bfa2d3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 13:16:24 +0100 Subject: [PATCH 143/516] [SimplifyCFG] Use range based for loop (NFC) --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index bf0eca555014d..80854e8ffbd26 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2885,13 +2885,10 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; - for (BasicBlock::iterator BBI = ThenBB->begin(), - BBE = std::prev(ThenBB->end()); - BBI != BBE; ++BBI) { - Instruction *I = &*BBI; + for (Instruction &I : drop_end(*ThenBB)) { // Skip debug info. if (isa(I)) { - SpeculatedDbgIntrinsics.push_back(I); + SpeculatedDbgIntrinsics.push_back(&I); continue; } @@ -2903,7 +2900,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // the samples collected on the non-conditional path are counted towards // the conditional path. We leave it for the counts inference algorithm to // figure out a proper count for an unknown probe. - SpeculatedDbgIntrinsics.push_back(I); + SpeculatedDbgIntrinsics.push_back(&I); continue; } @@ -2914,23 +2911,23 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && + if (!isSafeToSpeculativelyExecute(&I) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( - I, BB, ThenBB, EndBB)))) + &I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - computeSpeculationCost(I, TTI) > + computeSpeculationCost(&I, TTI) > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. if (SpeculatedStoreValue) - SpeculatedStore = cast(I); + SpeculatedStore = cast(&I); // Do not hoist the instruction if any of its operands are defined but not // used in BB. The transformation will prevent the operand from // being sunk into the use block. - for (Use &Op : I->operands()) { + for (Use &Op : I.operands()) { Instruction *OpI = dyn_cast(Op); if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) continue; // Not a candidate for sinking. From 37f80101a9120c95eb3c1022b8cc4a390f561700 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 3 Nov 2022 10:14:30 -0400 Subject: [PATCH 144/516] Silence a "not all control paths return" MSVC warning; NFC --- clang/lib/AST/Interp/Descriptor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 55182ec383fa1..f645063acdd01 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -184,6 +184,7 @@ static BlockCtorFn getCtorArrayPrim(PrimType Type) { static BlockDtorFn getDtorArrayPrim(PrimType Type) { TYPE_SWITCH(Type, return dtorArrayTy); + llvm_unreachable("unknown Expr"); } static BlockMoveFn getMoveArrayPrim(PrimType Type) { From 9ea2b150b5455b907ba3b9aa24703b5d4faabedd Mon Sep 17 00:00:00 2001 From: Jan Sjodin Date: Wed, 2 Nov 2022 10:18:48 -0400 Subject: [PATCH 145/516] [OpenMP][OMPIRBuilder] Migrate createOffloadEntriesAndInfoMetadata from clang to OpenMPIRBuilder This patch moves the createOffloadEntriesAndInfoMetadata to OpenMPIRBuilder, the createOffloadEntry helper function. The clang specific error handling is invoked using a callback. This code will also be used by flang in the future. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 221 ++++-------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 9 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 27 --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h | 8 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 32 +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 175 ++++++++++++++ 6 files changed, 254 insertions(+), 218 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9a2fc93ce40c6..6b0908d139f47 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2950,194 +2950,57 @@ enum KmpTaskTFields { }; } // anonymous namespace -void CGOpenMPRuntime::createOffloadEntry( - llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); -} - void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { - // Emit the offloading entries and metadata so that the device codegen side - // can easily figure out what to emit. The produced metadata looks like - // this: - // - // !omp_offload.info = !{!1, ...} - // - // Right now we only generate metadata for function that contain target - // regions. - // If we are in simd mode or there are no entries, we don't need to do // anything. if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &C = M.getContext(); - SmallVector< - std::tuple, - 16> - OrderedEntries(OffloadEntriesInfoManager.size()); - llvm::SmallVector ParentFunctions( - OffloadEntriesInfoManager.size()); - - // Auxiliary methods to create metadata values and strings. - auto &&GetMDInt = [this](unsigned V) { - return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(CGM.Int32Ty, V)); - }; - - auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; - - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - - // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = - [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, - &GetMDString]( - const llvm::TargetRegionEntryInfo &EntryInfo, - const llvm::OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion - &E) { - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was - // identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), - GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), - GetMDInt(EntryInfo.Line), GetMDInt(E.getOrder())}; - - SourceLocation Loc; - for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), - E = CGM.getContext().getSourceManager().fileinfo_end(); - I != E; ++I) { - if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && - I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { - Loc = CGM.getContext().getSourceManager().translateFileLineCol( - I->getFirst(), EntryInfo.Line, 1); - break; - } - } - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, Loc, StringRef(EntryInfo.ParentName)); - ParentFunctions[E.getOrder()] = StringRef(EntryInfo.ParentName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); - - // Create function that emits metadata for each device global variable entry; - auto &&DeviceGlobalVarMetadataEmitter = - [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD]( - StringRef MangledName, - const llvm::OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar - &E) { - // Generate metadata for global variables. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (1). - // - Entry 1 -> Mangled name of the variable. - // - Entry 2 -> Declare target kind. - // - Entry 3 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDString(MangledName), - GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, SourceLocation(), MangledName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( - DeviceGlobalVarMetadataEmitter); - - for (const auto &E : OrderedEntries) { - assert(std::get<0>(E) && "All ordered entries must exist!"); - if (const auto *CE = dyn_cast< - llvm::OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>( - std::get<0>(E))) { - if (!CE->getID() || !CE->getAddress()) { - // Do not blame the entry if the parent funtion is not emitted. - StringRef FnName = ParentFunctions[CE->getOrder()]; - if (!CGM.GetGlobalValue(FnName)) - continue; - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for target region in %0 is incorrect: either the " - "address or the ID is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; - continue; - } - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, - CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = dyn_cast( - std::get<0>(E))) { - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags = - static_cast< - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( - CE->getFlags()); - switch (Flags) { - case llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: { - if (CGM.getLangOpts().OpenMPIsDevice && - CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Offloading entry for declare target " - "variable %0 is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); - continue; - } - // The vaiable has no definition - no need to add the entry. - if (CE->getVarSize() == 0) - continue; - break; - } - case llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink: - assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || - (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && - "Declaret target link address is set."); - if (CGM.getLangOpts().OpenMPIsDevice) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); - continue; + llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = + [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, + const llvm::TargetRegionEntryInfo &EntryInfo) -> void { + SourceLocation Loc; + if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && + I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), EntryInfo.Line, 1); + break; } - break; } - - // Hidden or internal symbols on the device are not externally visible. We - // should not attempt to register them by creating an offloading entry. - if (auto *GV = dyn_cast(CE->getAddress())) - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - continue; - - createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), - Flags, CE->getLinkage()); - } else { - llvm_unreachable("Unsupported entry kind."); } - } + switch (Kind) { + case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for target region in " + "%0 is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + } break; + } + }; + + OMPBuilder.createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager, isTargetCodegen(), + CGM.getLangOpts().OpenMPIsDevice, + CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory(), ErrorReportFn); } /// Loads all the offload entries information from the host IR diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 92443b8b5c2b9..70c6b89d9b66f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -315,12 +315,6 @@ class CGOpenMPRuntime { explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator); - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage); - /// Helper to emit outlined function for 'target' directive. /// \param D Directive to emit. /// \param ParentName Name of the function that encloses the target region. @@ -713,6 +707,9 @@ class CGOpenMPRuntime { virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Returns true if the current target is a GPU. + virtual bool isTargetCodegen() const { return false; } + /// Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 320ee122a066c..9f2f60d892b02 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -840,33 +840,6 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, CGM.addCompilerUsedGlobal(GVMode); } -void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, - uint64_t Size, int32_t, - llvm::GlobalValue::LinkageTypes) { - // TODO: Add support for global variables on the device after declare target - // support. - llvm::Function *Fn = dyn_cast(Addr); - if (!Fn) - return; - - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - // Get "nvvm.annotations" metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); - - // Add a function attribute for the kernel. - Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 9e8130966735a..214f5e3d618aa 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -64,12 +64,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { // Base class overrides. // - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) override; - /// Emit outlined function specialized for the Fork-Join /// programming model for applicable target directives on the NVPTX device. /// \param D Directive to emit. @@ -169,6 +163,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; + bool isTargetCodegen() const override { return true; }; + /// Declare generalized virtual functions which need to be defined /// by all specializations of OpenMPGPURuntime Targets like AMDGCN /// and NVPTX. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 3f9fa6d3c8147..27d84bcb9f40a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -24,6 +24,7 @@ namespace llvm { class CanonicalLoopInfo; +struct TargetRegionEntryInfo; class OffloadEntriesInfoManager; /// Move the instruction after an InsertPoint to the beginning of another @@ -1093,6 +1094,37 @@ class OpenMPIRBuilder { bool EmitDebug = false, bool ForEndCall = false); + /// Creates offloading entry for the provided entry ID \a ID, + /// address \a Addr, size \a Size, and flags \a Flags. + void createOffloadEntry(bool IsTargetCodegen, Constant *ID, Constant *Addr, + uint64_t Size, int32_t Flags, + GlobalValue::LinkageTypes); + + /// The kind of errors that can occur when emitting the offload entries and + /// metadata. + enum EmitMetadataErrorKind { + EMIT_MD_TARGET_REGION_ERROR, + EMIT_MD_DECLARE_TARGET_ERROR, + EMIT_MD_GLOBAL_VAR_LINK_ERROR + }; + + /// Callback function type + using EmitMetadataErrorReportFunctionTy = + std::function; + + // Emit the offloading entries and metadata so that the device codegen side + // can easily figure out what to emit. The produced metadata looks like + // this: + // + // !omp_offload.info = !{!1, ...} + // + // We only generate metadata for function that contain target regions. + void createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager &OffloadEntriesInfoManager, + bool IsTargetCodegen, bool IsEmbedded, + bool HasRequiresUnifiedSharedMemory, + EmitMetadataErrorReportFunctionTy &ErrorReportFunction); + public: /// Generator for __kmpc_copyprivate /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 5052ddc5dde88..89629583dc5c2 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4692,6 +4692,181 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::createOffloadEntry(bool IsTargetCodegen, Constant *ID, + Constant *Addr, uint64_t Size, + int32_t Flags, + GlobalValue::LinkageTypes) { + if (!IsTargetCodegen) { + emitOffloadingEntry(ID, Addr->getName(), Size, Flags); + return; + } + // TODO: Add support for global variables on the device after declare target + // support. + Function *Fn = dyn_cast(Addr); + if (!Fn) + return; + + Module &M = *(Fn->getParent()); + LLVMContext &Ctx = M.getContext(); + + // Get "nvvm.annotations" metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + + Metadata *MDVals[] = { + ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))}; + // Append metadata to nvvm.annotations. + MD->addOperand(MDNode::get(Ctx, MDVals)); + + // Add a function attribute for the kernel. + Fn->addFnAttr(Attribute::get(Ctx, "kernel")); +} + +// We only generate metadata for function that contain target regions. +void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager &OffloadEntriesInfoManager, bool IsTargetCodegen, + bool IsEmbedded, bool HasRequiresUnifiedSharedMemory, + EmitMetadataErrorReportFunctionTy &ErrorFn) { + + // If there are no entries, we don't need to do anything. + if (OffloadEntriesInfoManager.empty()) + return; + + LLVMContext &C = M.getContext(); + SmallVector, + 16> + OrderedEntries(OffloadEntriesInfoManager.size()); + + // Auxiliary methods to create metadata values and strings. + auto &&GetMDInt = [this](unsigned V) { + return ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), V)); + }; + + auto &&GetMDString = [&C](StringRef V) { return MDString::get(C, V); }; + + // Create the offloading info metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); + auto &&TargetRegionMetadataEmitter = + [this, &C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + const TargetRegionEntryInfo &EntryInfo, + const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &E) { + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was + // identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = { + GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), + GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), + GetMDInt(EntryInfo.Line), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( + TargetRegionMetadataEmitter); + + // Create function that emits metadata for each device global variable entry; + auto &&DeviceGlobalVarMetadataEmitter = + [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD]( + StringRef MangledName, + const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &E) { + // Generate metadata for global variables. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (1). + // - Entry 1 -> Mangled name of the variable. + // - Entry 2 -> Declare target kind. + // - Entry 3 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName), + GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0); + OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + DeviceGlobalVarMetadataEmitter); + + for (const auto &E : OrderedEntries) { + assert(E.first && "All ordered entries must exist!"); + if (const auto *CE = + dyn_cast( + E.first)) { + if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + TargetRegionEntryInfo EntryInfo = E.second; + StringRef FnName = EntryInfo.ParentName; + if (!M.getNamedValue(FnName)) + continue; + ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo); + continue; + } + createOffloadEntry(IsTargetCodegen, CE->getID(), CE->getAddress(), + /*Size=*/0, CE->getFlags(), + GlobalValue::WeakAnyLinkage); + } else if (const auto *CE = dyn_cast< + OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>( + E.first)) { + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags = + static_cast( + CE->getFlags()); + switch (Flags) { + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: { + if (IsEmbedded && HasRequiresUnifiedSharedMemory) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second); + continue; + } + // The vaiable has no definition - no need to add the entry. + if (CE->getVarSize() == 0) + continue; + break; + } + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink: + assert(((IsEmbedded && !CE->getAddress()) || + (!IsEmbedded && CE->getAddress())) && + "Declaret target link address is set."); + if (IsEmbedded) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo()); + continue; + } + break; + } + + // Hidden or internal symbols on the device are not externally visible. + // We should not attempt to register them by creating an offloading + // entry. + if (auto *GV = dyn_cast(CE->getAddress())) + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + continue; + + createOffloadEntry(IsTargetCodegen, CE->getAddress(), CE->getAddress(), + CE->getVarSize(), Flags, CE->getLinkage()); + + } else { + llvm_unreachable("Unsupported entry kind."); + } + } +} + void TargetRegionEntryInfo::getTargetRegionEntryFnName( SmallVectorImpl &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line) { From 150fc73ddab65901e2f89c70af436706b859eef8 Mon Sep 17 00:00:00 2001 From: bipmis Date: Thu, 3 Nov 2022 14:32:07 +0000 Subject: [PATCH 146/516] [AggressiveInstCombine] Avoid load merge/widen if stores are present b/w loads This patch is to address the test cases in which the load has to be inserted at a right point. This happens when there is a store b/w the loads. This patch reverts the loads merge in all cases when stores are present b/w loads and will eventually be replaced with proper fix and test cases. Differential Revision: https://reviews.llvm.org/D137333 --- .../AggressiveInstCombine.cpp | 6 +- .../AggressiveInstCombine/AArch64/or-load.ll | 182 +++++++-------- .../AggressiveInstCombine/X86/or-load.ll | 208 ++++++++---------- 3 files changed, 173 insertions(+), 223 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 52d0defae63c9..4ce823f526637 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -729,15 +729,15 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) return false; - // Alias Analysis to check for store b/w the loads. + // TODO: Alias Analysis to check for stores b/w the loads. + // Currently bail out if there are stores b/w the loads. LoadInst *Start = LI1, *End = LI2; if (!LI1->comesBefore(LI2)) std::swap(Start, End); - MemoryLocation Loc = MemoryLocation::get(End); unsigned NumScanned = 0; for (Instruction &Inst : make_range(Start->getIterator(), End->getIterator())) { - if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) + if (Inst.mayWriteToMemory()) return false; if (++NumScanned > MaxInstrsToScan) return false; diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 24febe4ed8fbc..3815d682831d5 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -142,31 +142,26 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_alias( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_alias( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -193,31 +188,26 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias_BE( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; LE-NEXT: ret i32 [[O3]] -; -; BE-LABEL: @loadCombine_4consecutive_alias_BE( -; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: ret i32 [[L1]] +; ALL-LABEL: @loadCombine_4consecutive_alias_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1770,32 +1760,26 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1822,32 +1806,26 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert2( -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3 -; LE-NEXT: store i8 0, ptr [[P3]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert2( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: store i8 0, ptr [[P3]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index 7eb3fc31ba6d6..c8852376d1cac 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -150,31 +150,26 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_alias( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_alias( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -201,31 +196,26 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias_BE( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; LE-NEXT: ret i32 [[O3]] -; -; BE-LABEL: @loadCombine_4consecutive_alias_BE( -; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: ret i32 [[L1]] +; ALL-LABEL: @loadCombine_4consecutive_alias_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1861,22 +1851,16 @@ define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) { } define i16 @loadCombine_2consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_2consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 -; LE-NEXT: ret i16 [[L1]] -; -; BE-LABEL: @loadCombine_2consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 -; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] -; BE-NEXT: ret i16 [[O1]] +; ALL-LABEL: @loadCombine_2consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; ALL-NEXT: ret i16 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l2 = load i8, ptr %p1 @@ -1890,32 +1874,26 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1942,32 +1920,26 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert2( -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3 -; LE-NEXT: store i8 0, ptr [[P3]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert2( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: store i8 0, ptr [[P3]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 From 68b24c3b448946f86586be6e66af64cdce6edadc Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 11:25:42 +0100 Subject: [PATCH 147/516] [CVP] Simplify comparisons without constant operand CVP currently only tries to simplify comparisons if there is a constant operand. However, even if both are non-constant, we may be able to determine the result of the comparison based on range information. IPSCCP is already capable of doing this, but because it runs very early, it may miss some cases. Differential Revision: https://reviews.llvm.org/D137253 --- llvm/lib/Analysis/LazyValueInfo.cpp | 24 ++++++++++++++++--- .../Scalar/CorrelatedValuePropagation.cpp | 10 ++++---- .../CorrelatedValuePropagation/icmp.ll | 10 ++++---- .../CorrelatedValuePropagation/mul.ll | 3 +-- .../CorrelatedValuePropagation/shl.ll | 3 +-- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 2fae260e0d8fe..6f61091a41b98 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1859,9 +1859,27 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS, return getPredicateAt(CmpInst::getSwappedPredicate(Pred), RHS, C, CxtI, UseBlockValue); - // Got two non-Constant values. While we could handle them somewhat, - // by getting their constant ranges, and applying ConstantRange::icmp(), - // so far it did not appear to be profitable. + // Got two non-Constant values. Try to determine the comparison results based + // on the block values of the two operands, e.g. because they have + // non-overlapping ranges. + if (UseBlockValue) { + Module *M = CxtI->getModule(); + ValueLatticeElement L = + getImpl(PImpl, AC, M).getValueInBlock(LHS, CxtI->getParent(), CxtI); + if (L.isOverdefined()) + return LazyValueInfo::Unknown; + + ValueLatticeElement R = + getImpl(PImpl, AC, M).getValueInBlock(RHS, CxtI->getParent(), CxtI); + Type *Ty = CmpInst::makeCmpResultType(LHS->getType()); + if (Constant *Res = L.getCompare((CmpInst::Predicate)P, Ty, R, + M->getDataLayout())) { + if (Res->isNullValue()) + return LazyValueInfo::False; + if (Res->isOneValue()) + return LazyValueInfo::True; + } + } return LazyValueInfo::Unknown; } diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ad47fd7c6e9d1..ea887d982b1b7 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -340,18 +340,16 @@ static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { /// exploiting range information. static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { Value *Op0 = Cmp->getOperand(0); - auto *C = dyn_cast(Cmp->getOperand(1)); - if (!C) - return false; - + Value *Op1 = Cmp->getOperand(1); LazyValueInfo::Tristate Result = - LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp, + LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp, /*UseBlockValue=*/true); if (Result == LazyValueInfo::Unknown) return false; ++NumCmps; - Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result); + Constant *TorF = + ConstantInt::get(CmpInst::makeCmpResultType(Op0->getType()), Result); Cmp->replaceAllUsesWith(TorF); Cmp->eraseFromParent(); return true; diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index 4a3d9c752f04e..506aac79358f6 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -1180,10 +1180,8 @@ define void @non_const_range(i32 %a, i32 %b) { ; CHECK-NEXT: br i1 [[AND]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: [[A_100:%.*]] = add nuw nsw i32 [[A]], 100 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A_100]], [[B]] -; CHECK-NEXT: call void @check1(i1 [[CMP3]]) -; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[A_100]], [[B]] -; CHECK-NEXT: call void @check1(i1 [[CMP4]]) +; CHECK-NEXT: call void @check1(i1 true) +; CHECK-NEXT: call void @check1(i1 false) ; CHECK-NEXT: [[A_10:%.*]] = add nuw nsw i32 [[A]], 10 ; CHECK-NEXT: [[CMP5:%.*]] = icmp ne i32 [[A_10]], [[B]] ; CHECK-NEXT: call void @check1(i1 [[CMP5]]) @@ -1220,8 +1218,7 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) { ; CHECK-LABEL: @non_const_range_minmax( ; CHECK-NEXT: [[A2:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 10) ; CHECK-NEXT: [[B2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 11) -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 [[A2]], [[B2]] -; CHECK-NEXT: ret i1 [[CMP1]] +; CHECK-NEXT: ret i1 true ; %a2 = call i8 @llvm.umin.i8(i8 %a, i8 10) %b2 = call i8 @llvm.umax.i8(i8 %b, i8 11) @@ -1229,6 +1226,7 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) { ret i1 %cmp1 } +; FIXME: Also support vectors. define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) { ; CHECK-LABEL: @non_const_range_minmax_vec( ; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll index 3ac26f4507099..c69d259984bd0 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll @@ -209,8 +209,7 @@ define i1 @nsw_range1(i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add nuw nsw i8 [[B:%.*]], -3 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i8 [[C]], 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[C]], [[MUL]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; entry: %c = add nuw nsw i8 %b, -3 diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll b/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll index 98113cbdae36a..88311219dee58 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll @@ -412,8 +412,7 @@ define i1 @nsw_range1(i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add nuw nsw i8 [[B:%.*]], -3 ; CHECK-NEXT: [[SHL:%.*]] = shl nsw i8 [[C]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[C]], [[SHL]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; entry: %c = add nuw nsw i8 %b, -3 From 791411a037b10d82a8c94e7aace277f644ef9360 Mon Sep 17 00:00:00 2001 From: zhijian Date: Thu, 3 Nov 2022 10:36:44 -0400 Subject: [PATCH 148/516] [XCOFF] change the decoding of External symbol's function auxiliary entry in XCOFF32 for llvm-readobj Summary: llvm-readobj decide whether to decode the external symbol's function auxiliary entry based on whether symbol is function or not currently. But the XCOFFSymbolRef::isFunction() do not work properly when -ffunction-sections is enabled. we will not decode the function auxiliary entry based on the XCOFFSymbolRef::isFunction() we will decode the function auxiliary entry based on following: According to the https://www.ibm.com/docs/en/aix/7.2?topic=formats-xcoff-object-file-format#XCOFF__c0f91ad419jbau In XCOFF32, there are only "one csect Auxiliary Entry" and "a function auxiliary symbol table entry" for the C_EXT, C_WEAKEXT, and C_HIDEXT Symbols. and By convention, the csect auxiliary entry in an XCOFF32 file must be the last auxiliary entry for any external symbol that has more than one auxiliary entry( that means for the C_EXT, C_WEAKEXT, and C_HIDEXT Symbols. if there more than one auxiliary Entries. we look the last one as csect auxiliary entry. and others auxiliary entries as function entries). Reviewers: Hubert Tong, James Henderson Differential Revision: https://reviews.llvm.org/D136950 --- .../llvm-readobj/XCOFF/symbols-invalid.test | 71 ++++++++----------- .../tools/llvm-readobj/XCOFF/symbols.test | 61 ++++++++++++++++ llvm/tools/llvm-readobj/XCOFFDumper.cpp | 11 +-- 3 files changed, 90 insertions(+), 53 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test b/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test index e1b1aac29b4ed..3db8803149242 100644 --- a/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test +++ b/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test @@ -1,10 +1,5 @@ ## Test that we report warnings or dump raw data when symbols are invalid. -# RUN: yaml2obj %s --docnum=1 -o %t1 -# RUN: llvm-readobj --syms %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefix=CASE1 - -# CASE1: warning: '[[FILE]]': the non-function C_EXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry - --- !XCOFF FileHeader: MagicNumber: 0x1DF @@ -14,55 +9,45 @@ Symbols: StorageClass: [[STORAGECLASS='C_EXT']] NumberOfAuxEntries: 2 -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_WEAKEXT' -o %t2 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_STAT' -o %t1 +# RUN: llvm-readobj --syms %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefix=CASE1 + +# CASE1: warning: '[[FILE]]': the C_STAT symbol at index 1 should not have more than 1 auxiliary entry + +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_DWARF' -o %t2 # RUN: llvm-readobj --syms %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=CASE2 -# CASE2: warning: '[[FILE]]': the non-function C_WEAKEXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry +# CASE2: warning: '[[FILE]]': the C_DWARF symbol at index 1 should not have more than 1 auxiliary entry -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_HIDEXT' -o %t3 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_BLOCK' -o %t3 # RUN: llvm-readobj --syms %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=CASE3 -# CASE3: warning: '[[FILE]]': the non-function C_HIDEXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry +# CASE3: warning: '[[FILE]]': the C_BLOCK symbol at index 1 should not have more than 1 auxiliary entry -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_STAT' -o %t4 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_FCN' -o %t4 # RUN: llvm-readobj --syms %t4 2>&1 | FileCheck %s -DFILE=%t4 --check-prefix=CASE4 -# CASE4: warning: '[[FILE]]': the C_STAT symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_DWARF' -o %t5 -# RUN: llvm-readobj --syms %t5 2>&1 | FileCheck %s -DFILE=%t5 --check-prefix=CASE5 - -# CASE5: warning: '[[FILE]]': the C_DWARF symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_BLOCK' -o %t6 -# RUN: llvm-readobj --syms %t6 2>&1 | FileCheck %s -DFILE=%t6 --check-prefix=CASE6 - -# CASE6: warning: '[[FILE]]': the C_BLOCK symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_FCN' -o %t7 -# RUN: llvm-readobj --syms %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=CASE7 - -# CASE7: warning: '[[FILE]]': the C_FCN symbol at index 1 should not have more than 1 auxiliary entry +# CASE4: warning: '[[FILE]]': the C_FCN symbol at index 1 should not have more than 1 auxiliary entry ## This case tests the raw data output ability when a file auxiliary entry does ## not have the matching auxiliary type. -# RUN: yaml2obj %s --docnum=2 -o %t8 -# RUN: llvm-readobj --syms %t8 | FileCheck %s --strict-whitespace --match-full-lines --check-prefix=CASE8 - -# CASE8:Symbols [ -# CASE8-NEXT: Symbol { -# CASE8-NEXT: Index: 0 -# CASE8-NEXT: Name: .fun -# CASE8-NEXT: Value (SymbolTableIndex): 0x0 -# CASE8-NEXT: Section: N_UNDEF -# CASE8-NEXT: Source Language ID: TB_C (0x0) -# CASE8-NEXT: CPU Version ID: 0x0 -# CASE8-NEXT: StorageClass: C_FILE (0x67) -# CASE8-NEXT: NumberOfAuxEntries: 1 -# CASE8-NEXT: !Unexpected raw auxiliary entry data: -# CASE8-NEXT: 00000000 00000001 00020300 00000000 00fb -# CASE8-NEXT: } -# CASE8-NEXT:] +# RUN: yaml2obj %s --docnum=2 -o %t5 +# RUN: llvm-readobj --syms %t5 | FileCheck %s --strict-whitespace --match-full-lines --check-prefix=CASE5 + +# CASE5:Symbols [ +# CASE5-NEXT: Symbol { +# CASE5-NEXT: Index: 0 +# CASE5-NEXT: Name: .fun +# CASE5-NEXT: Value (SymbolTableIndex): 0x0 +# CASE5-NEXT: Section: N_UNDEF +# CASE5-NEXT: Source Language ID: TB_C (0x0) +# CASE5-NEXT: CPU Version ID: 0x0 +# CASE5-NEXT: StorageClass: C_FILE (0x67) +# CASE5-NEXT: NumberOfAuxEntries: 1 +# CASE5-NEXT: !Unexpected raw auxiliary entry data: +# CASE5-NEXT: 00000000 00000001 00020300 00000000 00fb +# CASE5-NEXT: } +# CASE5-NEXT:] --- !XCOFF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/XCOFF/symbols.test b/llvm/test/tools/llvm-readobj/XCOFF/symbols.test index f72144c6f1a06..72ec8967cc957 100644 --- a/llvm/test/tools/llvm-readobj/XCOFF/symbols.test +++ b/llvm/test/tools/llvm-readobj/XCOFF/symbols.test @@ -134,6 +134,33 @@ Symbols: LineNumHi: 2 LineNumLo: 3 +##The C_WEAKEXT symbol with two Function auxiliary entries and a CSECT auxiliary entry. + - Name: .fun7 + Value: 0x0 + Section: .text + Type: 0x00 + StorageClass: C_WEAKEXT + NumberOfAuxEntries: 3 + AuxEntries: + - Type: AUX_FCN + OffsetToExceptionTbl: 2 + SizeOfFunction: 3 + SymIdxOfNextBeyond: 4 + PtrToLineNum: 5 + - Type: AUX_FCN + OffsetToExceptionTbl: 8 + SizeOfFunction: 5 + SymIdxOfNextBeyond: 8 + PtrToLineNum: 5 + - Type: AUX_CSECT + ParameterHashIndex: 11 + TypeChkSectNum: 22 + SymbolAlignmentAndType: 33 + StorageMappingClass: XMC_PR + SectionOrLength: 256 + StabInfoIndex: 44 + StabSectNum: 55 + # SYMBOL32: Symbols [ # SYMBOL32-NEXT: Symbol { # SYMBOL32-NEXT: Index: 0 @@ -304,4 +331,38 @@ Symbols: # SYMBOL32-NEXT: LineNumber (Low 2 Bytes): 0x3 # SYMBOL32-NEXT: } # SYMBOL32-NEXT: } +# SYMBOL32-NEXT: Symbol { +# SYMBOL32-NEXT: Index: 21 +# SYMBOL32-NEXT: Name: .fun7 +# SYMBOL32-NEXT: Value (RelocatableAddress): 0x0 +# SYMBOL32-NEXT: Section: .text +# SYMBOL32-NEXT: Type: 0x0 +# SYMBOL32-NEXT: StorageClass: C_WEAKEXT (0x6F) +# SYMBOL32-NEXT: NumberOfAuxEntries: 3 +# SYMBOL32-NEXT: Function Auxiliary Entry { +# SYMBOL32-NEXT: Index: 22 +# SYMBOL32-NEXT: OffsetToExceptionTable: 0x2 +# SYMBOL32-NEXT: SizeOfFunction: 0x3 +# SYMBOL32-NEXT: PointerToLineNum: 0x5 +# SYMBOL32-NEXT: SymbolIndexOfNextBeyond: 4 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: Function Auxiliary Entry { +# SYMBOL32-NEXT: Index: 23 +# SYMBOL32-NEXT: OffsetToExceptionTable: 0x8 +# SYMBOL32-NEXT: SizeOfFunction: 0x5 +# SYMBOL32-NEXT: PointerToLineNum: 0x5 +# SYMBOL32-NEXT: SymbolIndexOfNextBeyond: 8 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: CSECT Auxiliary Entry { +# SYMBOL32-NEXT: Index: 24 +# SYMBOL32-NEXT: SectionLen: 256 +# SYMBOL32-NEXT: ParameterHashIndex: 0xB +# SYMBOL32-NEXT: TypeChkSectNum: 0x16 +# SYMBOL32-NEXT: SymbolAlignmentLog2: 4 +# SYMBOL32-NEXT: SymbolType: XTY_SD (0x1) +# SYMBOL32-NEXT: StorageMappingClass: XMC_PR (0x0) +# SYMBOL32-NEXT: StabInfoIndex: 0x2C +# SYMBOL32-NEXT: StabSectNum: 0x37 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: } # SYMBOL32-NEXT: ] diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp index a2ca5b86f35e9..9e52f86a08fc9 100644 --- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp +++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp @@ -616,22 +616,13 @@ void XCOFFDumper::printSymbol(const SymbolRef &S) { case XCOFF::C_EXT: case XCOFF::C_WEAKEXT: case XCOFF::C_HIDEXT: { - if (!SymbolEntRef.isFunction() && NumberOfAuxEntries > 1) - reportUniqueWarning("the non-function " + - enumToString(static_cast(SymbolClass), - makeArrayRef(SymStorageClass)) + - " symbol at index " + Twine(SymbolIdx) + - " should have only 1 auxiliary entry, i.e. the CSECT " - "auxiliary entry"); - // For 32-bit objects, print the function auxiliary symbol table entry. The // last one must be a CSECT auxiliary entry. // For 64-bit objects, both a function auxiliary entry and an exception // auxiliary entry may appear, print them in the loop and skip printing the // CSECT auxiliary entry, which will be printed outside the loop. for (int I = 1; I <= NumberOfAuxEntries; I++) { - if ((I == NumberOfAuxEntries && !Obj.is64Bit()) || - !SymbolEntRef.isFunction()) + if (I == NumberOfAuxEntries && !Obj.is64Bit()) break; uintptr_t AuxAddress = XCOFFObjectFile::getAdvancedSymbolEntryAddress( From d42cfc4be1562ce7a887842493bf54c5b22e5857 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 15:46:23 +0100 Subject: [PATCH 149/516] [SimplifyCFG] Add tests for block speculation with assumes (NFC) --- llvm/test/Transforms/SimplifyCFG/assume.ll | 158 +++++++++++++++++++-- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/assume.ll b/llvm/test/Transforms/SimplifyCFG/assume.ll index ef3e5376f3950..1091f74518a2d 100644 --- a/llvm/test/Transforms/SimplifyCFG/assume.ll +++ b/llvm/test/Transforms/SimplifyCFG/assume.ll @@ -1,21 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s -define void @test1() { - call void @llvm.assume(i1 0) - ret void +define void @assume_false_to_unreachable1() { +; CHECK-LABEL: @assume_false_to_unreachable1( +; CHECK-NEXT: unreachable +; + call void @llvm.assume(i1 0) + ret void -; CHECK-LABEL: @test1 -; CHECK-NOT: llvm.assume -; CHECK: unreachable } -define void @test2() { - call void @llvm.assume(i1 undef) - ret void +define void @assume_undef_to_unreachable() { +; CHECK-LABEL: @assume_undef_to_unreachable( +; CHECK-NEXT: unreachable +; + call void @llvm.assume(i1 undef) + ret void -; CHECK-LABEL: @test2 -; CHECK-NOT: llvm.assume -; CHECK: unreachable +} + +define i32 @speculate_block_with_assume_basic(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + %cmp = icmp ne i32 %x, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ 1, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_extra_instr(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instr( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD]], [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %cmp = icmp ne i32 %add, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_extra_instrs_too_many(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_too_many( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD2]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD2]], [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %add2 = add i32 %add, 1 + %cmp = icmp ne i32 %add2, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add2, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_extra_instrs_okay(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD2]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD]], [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %add2 = add i32 %add, 1 + %cmp = icmp ne i32 %add2, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_operand_bundle(i1 %c, ptr %p) { +; CHECK-LABEL: @speculate_block_with_assume_operand_bundle( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[P:%.*]]) ] +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + call void @llvm.assume(i1 true) ["nonnull"(ptr %p)] + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ 1, %if ] + ret i32 %phi } declare void @llvm.assume(i1) nounwind From 2ddcf721a0e8dafec5196001b2472480f0011887 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 10:54:16 +0100 Subject: [PATCH 150/516] [InstCombine] Perform memset -> load forwarding InstCombine does some basic store to load forwarding. One case it currently misses is the case where the store is actually a memset. This patch adds support for this case. This is a minimal implementation that only handles a load at the memset base address, without an offset. GVN is already capable of performing this optimization. Having it in InstCombine can help with phase ordering issues, similar to the existing store to load forwarding. Differential Revision: https://reviews.llvm.org/D137323 --- llvm/lib/Analysis/Loads.cpp | 33 +++++++++++++++++ .../InstCombine/load-store-forward.ll | 36 ++++++++++--------- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 9eff2b161185e..93faefa947a3e 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -513,6 +513,39 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, return ConstantFoldLoadFromConst(C, AccessTy, DL); } + if (auto *MSI = dyn_cast(Inst)) { + // Don't forward from (non-atomic) memset to atomic load. + if (AtLeastAtomic) + return nullptr; + + // Only handle constant memsets. + auto *Val = dyn_cast(MSI->getValue()); + auto *Len = dyn_cast(MSI->getLength()); + if (!Val || !Len) + return nullptr; + + // TODO: Handle offsets. + Value *Dst = MSI->getDest(); + if (!AreEquivalentAddressValues(Dst, Ptr)) + return nullptr; + + if (IsLoadCSE) + *IsLoadCSE = false; + + // Make sure the read bytes are contained in the memset. + TypeSize LoadSize = DL.getTypeSizeInBits(AccessTy); + if (LoadSize.isScalable() || + (Len->getValue() * 8).ult(LoadSize.getFixedSize())) + return nullptr; + + APInt Splat = APInt::getSplat(LoadSize.getFixedSize(), Val->getValue()); + ConstantInt *SplatC = ConstantInt::get(MSI->getContext(), Splat); + if (CastInst::isBitOrNoopPointerCastable(SplatC->getType(), AccessTy, DL)) + return SplatC; + + return nullptr; + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index d90af935c65e5..5a847cd68db84 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -257,8 +257,7 @@ define i1 @load_i1_store_i8(ptr %a) { define i32 @load_after_memset_0(ptr %a) { ; CHECK-LABEL: @load_after_memset_0( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 0 ; call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) %v = load i32, ptr %a @@ -268,8 +267,7 @@ define i32 @load_after_memset_0(ptr %a) { define float @load_after_memset_0_float(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_float( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load float, ptr [[A]], align 4 -; CHECK-NEXT: ret float [[V]] +; CHECK-NEXT: ret float 0.000000e+00 ; call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) %v = load float, ptr %a @@ -279,8 +277,7 @@ define float @load_after_memset_0_float(ptr %a) { define i27 @load_after_memset_0_non_byte_sized(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_non_byte_sized( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load i27, ptr [[A]], align 4 -; CHECK-NEXT: ret i27 [[V]] +; CHECK-NEXT: ret i27 0 ; call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) %v = load i27, ptr %a @@ -290,8 +287,7 @@ define i27 @load_after_memset_0_non_byte_sized(ptr %a) { define <4 x i8> @load_after_memset_0_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load <4 x i8>, ptr [[A]], align 4 -; CHECK-NEXT: ret <4 x i8> [[V]] +; CHECK-NEXT: ret <4 x i8> zeroinitializer ; call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) %v = load <4 x i8>, ptr %a @@ -301,8 +297,7 @@ define <4 x i8> @load_after_memset_0_vec(ptr %a) { define i32 @load_after_memset_1(ptr %a) { ; CHECK-LABEL: @load_after_memset_1( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 16843009 ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load i32, ptr %a @@ -312,8 +307,7 @@ define i32 @load_after_memset_1(ptr %a) { define float @load_after_memset_1_float(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_float( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load float, ptr [[A]], align 4 -; CHECK-NEXT: ret float [[V]] +; CHECK-NEXT: ret float 0x3820202020000000 ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load float, ptr %a @@ -323,8 +317,7 @@ define float @load_after_memset_1_float(ptr %a) { define i27 @load_after_memset_1_non_byte_sized(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_non_byte_sized( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load i27, ptr [[A]], align 4 -; CHECK-NEXT: ret i27 [[V]] +; CHECK-NEXT: ret i27 16843009 ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load i27, ptr %a @@ -334,8 +327,7 @@ define i27 @load_after_memset_1_non_byte_sized(ptr %a) { define <4 x i8> @load_after_memset_1_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) -; CHECK-NEXT: [[V:%.*]] = load <4 x i8>, ptr [[A]], align 4 -; CHECK-NEXT: ret <4 x i8> [[V]] +; CHECK-NEXT: ret <4 x i8> ; call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) %v = load <4 x i8>, ptr %a @@ -353,6 +345,7 @@ define i32 @load_after_memset_unknown(ptr %a, i8 %byte) { ret i32 %v } +; TODO: Handle load at offset. define i32 @load_after_memset_0_offset(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_offset( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) @@ -416,6 +409,17 @@ define i256 @load_after_memset_0_too_small(ptr %a) { ret i256 %v } +define i129 @load_after_memset_0_too_small_by_one_bit(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_too_small_by_one_bit( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i129, ptr [[A]], align 4 +; CHECK-NEXT: ret i129 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i129, ptr %a + ret i129 %v +} + define i32 @load_after_memset_0_unknown_length(ptr %a, i64 %len) { ; CHECK-LABEL: @load_after_memset_0_unknown_length( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[A:%.*]], i8 0, i64 [[LEN:%.*]], i1 false) From d1cee3539fdf766e544fa783e7db1cd9ec8de801 Mon Sep 17 00:00:00 2001 From: LiDongjin Date: Tue, 18 Oct 2022 23:28:34 +0800 Subject: [PATCH 151/516] [LoopVectorize] Fix crash on "Cannot dereference end iterator!"(PR56627) Check hasOneUser before user_back(). Differential Revision: https://reviews.llvm.org/D136227 --- .../Transforms/Vectorize/LoopVectorize.cpp | 5 ++-- .../LoopVectorize/AArch64/pr56627.ll | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7983165f09842..0a01b5f90182e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6561,10 +6561,9 @@ Optional LoopVectorizationCostModel::getReductionPatternCost( return None; RetI = RetI->user_back(); } - if (match(RetI, m_Mul(m_Value(), m_Value())) && + + if (match(RetI, m_OneUse(m_Mul(m_Value(), m_Value()))) && RetI->user_back()->getOpcode() == Instruction::Add) { - if (!RetI->hasOneUser()) - return None; RetI = RetI->user_back(); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll new file mode 100644 index 0000000000000..40fbd3ed04cc0 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -S -passes=loop-vectorize | FileCheck %s + +; Check that we can vectorize this loop without crashing. + +target triple = "aarch64-none-linux-gnu" +define float @quux() { +; CHECK: @quux +bb: + br label %bb1 + +bb1: + %tmp = phi i64 [ %tmp3, %bb1 ], [ 0, %bb ] + %tmp2 = phi float [ %tmp5, %bb1 ], [ 0.000000e+00, %bb ] + %tmp3 = add nsw i64 %tmp, 1 + %tmp5 = fadd float %tmp2, 3.000000e+00 + %tmp6 = mul i32 0, 0 + %tmp7 = icmp sgt i64 %tmp, 0 + br i1 %tmp7, label %bb8, label %bb1 + +bb8: + %tmp9 = phi float [ %tmp5, %bb1 ] + ret float %tmp9 +} \ No newline at end of file From 3edd18876950693cbc69edda429f223616e3c052 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 3 Nov 2022 15:18:36 +0000 Subject: [PATCH 152/516] [AArch64] Add a baseline test for fp16 target intrinsics. NFC --- clang/test/Sema/aarch64-fp16-target.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 clang/test/Sema/aarch64-fp16-target.c diff --git a/clang/test/Sema/aarch64-fp16-target.c b/clang/test/Sema/aarch64-fp16-target.c new file mode 100644 index 0000000000000..13cee64c52f56 --- /dev/null +++ b/clang/test/Sema/aarch64-fp16-target.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -fsyntax-only -verify -emit-llvm -o - %s +// REQUIRES: aarch64-registered-target + +// Test that functions with the correct target attributes can use the correct FP16 intrinsics. + +#include + +__attribute__((target("fullfp16"))) +void test_fullfp16(float16_t f16) { + vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} +} + +__attribute__((target("arch=armv8-a+fp16"))) +void test_fp16_arch(float16_t f16) { + vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} +} + +__attribute__((target("+fp16"))) +void test_fp16(float16_t f16) { + vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} +} + +void undefined(float16_t f16) { + vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} +} From 5073ae2a883f2c1dc2d4e81d9f7ed7ffae8d8ba4 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 3 Nov 2022 15:33:07 +0000 Subject: [PATCH 153/516] [AMDGPU] Fix duplicated words in comments --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 4 ++-- llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index dfa3c0af6526c..79dc60c93f403 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5035,7 +5035,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // s16 -> <2 x s16>, and <3 x s16> -> <4 x s16>, LLT RoundedTy; - // S32 vector to to cover all data, plus TFE result element. + // S32 vector to cover all data, plus TFE result element. LLT TFETy; // Register type to use for each loaded component. Will be S32 or V2S16. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 24ef9fdb7b8cf..eb947a17ef9ec 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10375,7 +10375,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( // If it's free to do so, push canonicalizes further up the source, which may // find a canonical source. // - // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for + // TODO: More opcodes. Note this is unsafe for the _ieee minnum/maxnum for // sNaNs. if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) { auto *CRHS = dyn_cast(N0.getOperand(1)); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b80838c393fcc..05589010654aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -723,7 +723,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { } /// \returns true if this is an s_store_dword* instruction. This is more - /// specific than than isSMEM && mayStore. + /// specific than isSMEM && mayStore. static bool isScalarStore(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index f603244086a9c..19b74ce319fa1 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -358,7 +358,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // as the input registers. Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; - // This is the the unswizzled offset from the current dispatch's scratch wave + // This is the unswizzled offset from the current dispatch's scratch wave // base to the beginning of the current function's frame. Register FrameOffsetReg = AMDGPU::FP_REG; @@ -462,7 +462,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // VGPR used for SGPR spills Register VGPR; - // If the VGPR is is used for SGPR spills in a non-entrypoint function, the + // If the VGPR is used for SGPR spills in a non-entrypoint function, the // stack slot used to save/restore it in the prolog/epilog. Optional FI; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp index 2ae3157bab490..ae2c10116de85 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -36,7 +36,7 @@ /// the instructions in bb.then will only overwrite lanes that will never be /// accessed in bb.else. /// -/// This pass aims to to tell register allocator that %a is in-fact dead, +/// This pass aims to tell register allocator that %a is in-fact dead, /// through inserting a phi-node in bb.flow saying that %a is undef when coming /// from bb.then, and then replace the uses in the bb.else with the result of /// newly inserted phi. From 13bd41096286305ee603428f6adf161f52981827 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Tue, 18 Oct 2022 14:44:11 -0400 Subject: [PATCH 154/516] [mlir][Pass] Include anchor op in -pass-pipeline In D134622 the printed form of a pass manager is changed to include the name of the op that the pass manager is anchored on. This updates the `-pass-pipeline` argument format to include the anchor op as well, so that the printed form of a pipeline can be directly passed to `-pass-pipeline`. In most cases this requires updating `-pass-pipeline='pipeline'` to `-pass-pipeline='builtin.module(pipeline)'`. This also fixes an outdated assert that prevented running a `PassManager` anchored on `'any'`. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D134900 --- flang/test/Fir/cg-ops.fir | 2 +- mlir/docs/PassManagement.md | 14 +++++++------- mlir/include/mlir/Pass/PassManager.h | 1 + mlir/lib/Pass/Pass.cpp | 14 ++++++++++---- mlir/lib/Pass/PassRegistry.cpp | 4 +++- .../Analysis/test-alias-analysis-modref.mlir | 2 +- mlir/test/Analysis/test-alias-analysis.mlir | 2 +- mlir/test/Analysis/test-dominance.mlir | 2 +- mlir/test/Analysis/test-foo-analysis.mlir | 2 +- mlir/test/Analysis/test-liveness.mlir | 2 +- mlir/test/Analysis/test-match-reduction.mlir | 2 +- mlir/test/Analysis/test-topoligical-sort.mlir | 2 +- .../AffineToStandard/lower-affine-gpu.mlir | 2 +- .../Conversion/ArithToLLVM/arith-to-llvm.mlir | 2 +- .../ArithToLLVM/convert-nd-vector-to-llvmir.mlir | 2 +- .../ComplexToStandard/full-conversion.mlir | 2 +- mlir/test/Conversion/FuncToLLVM/func-memref.mlir | 4 ++-- .../test/Conversion/FuncToLLVM/func-to-llvm.mlir | 4 ++-- .../Conversion/MathToFuncs/math-to-funcs.mlir | 2 +- .../test/Conversion/MathToLLVM/math-to-llvm.mlir | 2 +- .../MemRefToLLVM/generic-functions.mlir | 4 ++-- .../SCFToGPU/no_blocks_no_threads.mlir | 4 ++-- mlir/test/Conversion/SCFToGPU/step_one.mlir | 4 ++-- mlir/test/Conversion/SCFToGPU/step_positive.mlir | 2 +- .../convert-shape-constraints.mlir | 2 +- .../TosaToLinalg/tosa-to-linalg-named.mlir | 2 +- .../TosaToLinalg/tosa-to-linalg-resize.mlir | 2 +- .../Conversion/TosaToLinalg/tosa-to-linalg.mlir | 2 +- .../VectorToGPU/vector-to-mma-ops-mma-sync.mlir | 4 ++-- .../VectorToGPU/vector-to-mma-ops.mlir | 2 +- .../VectorToSCF/tensor-transfer-ops.mlir | 2 +- .../unrolled-tensor-transfer-ops.mlir | 2 +- .../VectorToSCF/unrolled-vector-to-loops.mlir | 2 +- .../vector-to-scf-mask-and-permutation-map.mlir | 2 +- .../Conversion/VectorToSCF/vector-to-scf.mlir | 4 ++-- mlir/test/Dialect/Affine/canonicalize.mlir | 2 +- mlir/test/Dialect/Affine/loop-unswitch.mlir | 2 +- .../Affine/memref-stride-calculation.mlir | 2 +- mlir/test/Dialect/ControlFlow/canonicalize.mlir | 2 +- mlir/test/Dialect/GPU/promotion.mlir | 2 +- mlir/test/Dialect/LLVMIR/terminator.mlir | 2 +- .../Linalg/convert-elementwise-to-linalg.mlir | 2 +- mlir/test/Dialect/Linalg/detensorize_0d.mlir | 2 +- .../Dialect/Linalg/detensorize_br_operands.mlir | 2 +- mlir/test/Dialect/Linalg/detensorize_if.mlir | 2 +- .../test/Dialect/Linalg/detensorize_trivial.mlir | 4 ++-- mlir/test/Dialect/Linalg/detensorize_while.mlir | 4 ++-- .../Linalg/detensorize_while_impure_cf.mlir | 4 ++-- .../Linalg/detensorize_while_pure_cf.mlir | 2 +- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 2 +- .../Dialect/Linalg/fold-unit-trip-loops.mlir | 2 +- .../Dialect/NVGPU/optimize-shared-memory.mlir | 2 +- mlir/test/Dialect/Quant/canonicalize.mlir | 2 +- mlir/test/Dialect/SCF/canonicalize.mlir | 2 +- .../test/Dialect/SCF/for-loop-to-while-loop.mlir | 2 +- mlir/test/Dialect/SCF/loop-range.mlir | 2 +- mlir/test/Dialect/SCF/parallel-loop-fusion.mlir | 2 +- .../SCF/parallel-loop-tiling-inbound-check.mlir | 2 +- mlir/test/Dialect/SCF/parallel-loop-tiling.mlir | 2 +- .../Dialect/SPIRV/Transforms/canonicalize.mlir | 2 +- mlir/test/Dialect/SPIRV/Transforms/inlining.mlir | 2 +- mlir/test/Dialect/Vector/canonicalize.mlir | 2 +- mlir/test/IR/diagnostic-handler-filter.mlir | 2 +- mlir/test/IR/test-clone.mlir | 2 +- mlir/test/IR/test-matchers.mlir | 2 +- .../Linalg/CPU/test-one-shot-bufferize.mlir | 6 +++--- .../Standard/CPU/test-ceil-floor-pos-neg.mlir | 2 +- .../Vector/CPU/test-transfer-read-1d.mlir | 8 ++++---- .../Vector/CPU/test-transfer-read-2d.mlir | 8 ++++---- .../Vector/CPU/test-transfer-read-3d.mlir | 8 ++++---- .../Dialect/Vector/CPU/test-transfer-read.mlir | 4 ++-- .../Vector/CPU/test-transfer-to-loops.mlir | 4 ++-- .../GPU/CUDA/test-reduction-distribute.mlir | 2 +- .../Vector/GPU/CUDA/test-warp-distribute.mlir | 6 +++--- .../GPU/CUDA/TensorCore/wmma-matmul-f16.mlir | 2 +- .../GPU/CUDA/TensorCore/wmma-matmul-f32.mlir | 2 +- .../Integration/GPU/CUDA/all-reduce-and.mlir | 2 +- .../Integration/GPU/CUDA/all-reduce-max.mlir | 2 +- .../Integration/GPU/CUDA/all-reduce-min.mlir | 2 +- .../test/Integration/GPU/CUDA/all-reduce-op.mlir | 2 +- .../test/Integration/GPU/CUDA/all-reduce-or.mlir | 2 +- .../Integration/GPU/CUDA/all-reduce-region.mlir | 2 +- .../Integration/GPU/CUDA/all-reduce-xor.mlir | 2 +- mlir/test/Integration/GPU/CUDA/async.mlir | 2 +- mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir | 2 +- .../GPU/CUDA/multiple-all-reduce.mlir | 2 +- mlir/test/Integration/GPU/CUDA/shuffle.mlir | 2 +- mlir/test/Integration/GPU/CUDA/two-modules.mlir | 2 +- mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir | 2 +- mlir/test/Integration/GPU/ROCM/printf.mlir | 2 +- mlir/test/Integration/GPU/ROCM/two-modules.mlir | 2 +- mlir/test/Integration/GPU/ROCM/vecadd.mlir | 2 +- .../Integration/GPU/ROCM/vector-transferops.mlir | 2 +- .../Pass/crash-recovery-dynamic-failure.mlir | 2 +- mlir/test/Pass/crash-recovery.mlir | 6 +++--- .../Pass/dynamic-pipeline-fail-on-parent.mlir | 2 +- mlir/test/Pass/dynamic-pipeline-nested.mlir | 4 ++-- mlir/test/Pass/dynamic-pipeline.mlir | 8 ++++---- mlir/test/Pass/generic-pipeline.mlir | 2 +- mlir/test/Pass/interface-pass.mlir | 2 +- mlir/test/Pass/invalid-interface-pass.mlir | 2 +- .../Pass/invalid-ir-print-after-failure.mlir | 12 ++++++------ mlir/test/Pass/invalid-parent.mlir | 2 +- mlir/test/Pass/invalid-pass.mlir | 2 +- mlir/test/Pass/ir-printing.mlir | 14 +++++++------- mlir/test/Pass/pass-timing.mlir | 8 ++++---- mlir/test/Pass/pipeline-options-parsing.mlir | 14 +++++++------- mlir/test/Pass/pipeline-parsing.mlir | 16 +++++++++++----- mlir/test/Pass/pipeline-stats.mlir | 4 ++-- mlir/test/Target/LLVMIR/arm-neon-2d.mlir | 2 +- mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir | 2 +- .../Transforms/canonicalize-block-merge.mlir | 2 +- mlir/test/Transforms/canonicalize-dce.mlir | 2 +- mlir/test/Transforms/canonicalize-td.mlir | 4 ++-- mlir/test/Transforms/canonicalize.mlir | 2 +- mlir/test/Transforms/cse.mlir | 2 +- .../Transforms/parallel-loop-collapsing.mlir | 2 +- mlir/test/Transforms/parametric-mapping.mlir | 2 +- mlir/test/Transforms/sccp-callgraph.mlir | 4 ++-- mlir/test/Transforms/sccp-structured.mlir | 2 +- mlir/test/Transforms/sccp.mlir | 2 +- .../single-parallel-loop-collapsing.mlir | 2 +- .../Transforms/test-canonicalize-filter.mlir | 6 +++--- mlir/test/Transforms/test-canonicalize.mlir | 4 ++-- .../test-operation-folder-commutative.mlir | 2 +- mlir/test/Transforms/test-symbol-dce.mlir | 2 +- mlir/test/mlir-cpu-runner/async-error.mlir | 2 +- mlir/test/mlir-cpu-runner/async-group.mlir | 2 +- mlir/test/mlir-cpu-runner/async-value.mlir | 2 +- mlir/test/mlir-cpu-runner/async.mlir | 2 +- .../test/mlir-cpu-runner/bare-ptr-call-conv.mlir | 2 +- mlir/test/mlir-cpu-runner/copy.mlir | 2 +- mlir/test/mlir-cpu-runner/global-memref.mlir | 2 +- .../mlir-cpu-runner/math-polynomial-approx.mlir | 2 +- .../mlir-cpu-runner/memref-reinterpret-cast.mlir | 2 +- mlir/test/mlir-cpu-runner/memref-reshape.mlir | 2 +- mlir/test/mlir-cpu-runner/print.mlir | 2 +- .../mlir-cpu-runner/sgemm-naive-codegen.mlir | 2 +- mlir/test/mlir-cpu-runner/unranked-memref.mlir | 2 +- mlir/test/mlir-cpu-runner/utils.mlir | 8 ++++---- mlir/test/mlir-opt/async.mlir | 2 +- 141 files changed, 228 insertions(+), 213 deletions(-) diff --git a/flang/test/Fir/cg-ops.fir b/flang/test/Fir/cg-ops.fir index 6fcaa5c3d6066..c8c666a62a3d2 100644 --- a/flang/test/Fir/cg-ops.fir +++ b/flang/test/Fir/cg-ops.fir @@ -1,4 +1,4 @@ -// RUN: fir-opt --split-input-file --pass-pipeline="cg-rewrite,cse" %s | FileCheck %s +// RUN: fir-opt --split-input-file --pass-pipeline="builtin.module(cg-rewrite,cse)" %s | FileCheck %s // CHECK-LABEL: func @codegen( // CHECK-SAME: %[[arg:.*]]: !fir diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 7748448ba1df4..ed225a84afb7c 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -602,7 +602,7 @@ A pipeline view that models the structure of the pass manager, this is the default view: ```shell -$ mlir-opt -pass-pipeline='func.func(my-pass,my-pass)' foo.mlir -mlir-pass-statistics +$ mlir-opt -pass-pipeline='any(func.func(my-pass,my-pass))' foo.mlir -mlir-pass-statistics ===-------------------------------------------------------------------------=== ... Pass statistics report ... @@ -621,7 +621,7 @@ A list view that aggregates the statistics of all instances of a specific pass together: ```shell -$ mlir-opt -pass-pipeline='func.func(my-pass, my-pass)' foo.mlir -mlir-pass-statistics -mlir-pass-statistics-display=list +$ mlir-opt -pass-pipeline='any(func.func(my-pass,my-pass))' foo.mlir -mlir-pass-statistics -mlir-pass-statistics-display=list ===-------------------------------------------------------------------------=== ... Pass statistics report ... @@ -750,10 +750,10 @@ Can also be specified as (via the `-pass-pipeline` flag): ```shell # Anchor the cse and canonicalize passes on the `func.func` operation. -$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}' +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1})' # Anchor the cse and canonicalize passes on "any" viable root operation. -$ mlir-opt foo.mlir -pass-pipeline='any(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}' +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(any(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1})' ``` In order to support round-tripping a pass to the textual representation using @@ -1121,7 +1121,7 @@ pipeline. This display mode is available in mlir-opt via `-mlir-timing-display=list`. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing -mlir-timing-display=list ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1146,7 +1146,7 @@ the most time, and can also be used to identify when analyses are being invalidated and recomputed. This is the default display mode. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1177,7 +1177,7 @@ perceived time, or clock time, whereas the `User Time` will display the total cpu time. ```shell -$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index ec858a7415ab1..058e2d738b388 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -75,6 +75,7 @@ class OpPassManager { OpPassManager(const OpPassManager &rhs); ~OpPassManager(); OpPassManager &operator=(const OpPassManager &rhs); + OpPassManager &operator=(OpPassManager &&rhs); /// Iterator over the passes in this pass manager. using pass_iterator = diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 0b229844cf874..edb5a53507724 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -290,12 +290,16 @@ OpPassManager::OpPassManager(StringRef name, Nesting nesting) : impl(new OpPassManagerImpl(name, nesting)) {} OpPassManager::OpPassManager(OperationName name, Nesting nesting) : impl(new OpPassManagerImpl(name, nesting)) {} -OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} +OpPassManager::OpPassManager(OpPassManager &&rhs) { *this = std::move(rhs); } OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { impl = std::make_unique(*rhs.impl); return *this; } +OpPassManager &OpPassManager::operator=(OpPassManager &&rhs) { + impl = std::move(rhs.impl); + return *this; +} OpPassManager::~OpPassManager() = default; @@ -773,9 +777,11 @@ void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; } /// Run the passes within this manager on the provided operation. LogicalResult PassManager::run(Operation *op) { MLIRContext *context = getContext(); - assert(op->getName() == getOpName(*context) && - "operation has a different name than the PassManager or is from a " - "different context"); + Optional anchorOp = getOpName(*context); + if (anchorOp && anchorOp != op->getName()) + return emitError(op->getLoc()) + << "can't run '" << getOpAnchorName() << "' pass manager on '" + << op->getName() << "' op"; // Register all dialects for the current pipeline. DialectRegistry dependentDialects; diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index 31b41153874a8..0ddb2e99ecfce 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -923,8 +923,10 @@ LogicalResult PassPipelineCLParser::addToPipeline( "' option can't be used with individual pass options"); std::string errMsg; llvm::raw_string_ostream os(errMsg); - if (failed(parsePassPipeline(passPipeline, pm, os))) + FailureOr parsed = parsePassPipeline(passPipeline, os); + if (failed(parsed)) return errorHandler(errMsg); + pm = std::move(*parsed); return success(); } diff --git a/mlir/test/Analysis/test-alias-analysis-modref.mlir b/mlir/test/Analysis/test-alias-analysis-modref.mlir index 7c5328cb7f750..eee8ae9049cfa 100644 --- a/mlir/test/Analysis/test-alias-analysis-modref.mlir +++ b/mlir/test/Analysis/test-alias-analysis-modref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-alias-analysis-modref))' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "no_side_effects" // CHECK: alloc -> func.region0#0: NoModRef diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir index 0e19282dbc9e9..8cbee61c78b45 100644 --- a/mlir/test/Analysis/test-alias-analysis.mlir +++ b/mlir/test/Analysis/test-alias-analysis.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-alias-analysis))' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "simple" // CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir index b7734151a516f..3c53193db7f72 100644 --- a/mlir/test/Analysis/test-dominance.mlir +++ b/mlir/test/Analysis/test-dominance.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-dominance))" -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_condBranch func.func @func_condBranch(%cond : i1) { diff --git a/mlir/test/Analysis/test-foo-analysis.mlir b/mlir/test/Analysis/test-foo-analysis.mlir index 7c5d07396a83f..83ac5b5796135 100644 --- a/mlir/test/Analysis/test-foo-analysis.mlir +++ b/mlir/test/Analysis/test-foo-analysis.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -pass-pipeline='func.func(test-foo-analysis)' %s 2>&1 | FileCheck %s +// RUN: mlir-opt -split-input-file -pass-pipeline='builtin.module(func.func(test-foo-analysis))' %s 2>&1 | FileCheck %s // CHECK-LABEL: function: @test_default_init func.func @test_default_init() -> () { diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir index 02179d6472d54..8ae3d09a6cd12 100644 --- a/mlir/test/Analysis/test-liveness.mlir +++ b/mlir/test/Analysis/test-liveness.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-liveness))" -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_empty func.func @func_empty() { diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir index ecc74c6cc686c..b5902db77e899 100644 --- a/mlir/test/Analysis/test-match-reduction.mlir +++ b/mlir/test/Analysis/test-match-reduction.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-match-reduction)" -verify-diagnostics -split-input-file +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-match-reduction))" -verify-diagnostics -split-input-file // Verify that the generic reduction detection utility works on different // dialects. diff --git a/mlir/test/Analysis/test-topoligical-sort.mlir b/mlir/test/Analysis/test-topoligical-sort.mlir index 9323c486f8a11..8608586402055 100644 --- a/mlir/test/Analysis/test-topoligical-sort.mlir +++ b/mlir/test/Analysis/test-topoligical-sort.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-topological-sort)" 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-topological-sort))" 2>&1 | FileCheck %s // CHECK-LABEL: Testing : region // CHECK: arith.addi {{.*}} : index diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir index 382c3359278e0..d88f71eac4458 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="gpu.module(lower-affine)" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(lower-affine))" %s | FileCheck %s #map0gpufunc = affine_map<(d0) -> (d0)> gpu.module @kernels { diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index f2ef7081e2c6f..eccd8755d7aa8 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm))" %s -split-input-file | FileCheck %s // CHECK-LABEL: @vector_ops func.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { diff --git a/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir b/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir index 7c219578baedd..63989347567b5 100644 --- a/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir +++ b/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm))" %s -split-input-file | FileCheck %s // CHECK-LABEL: @vec_bin func.func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> { diff --git a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir index f6d023b92aab9..9983dd46f0943 100644 --- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" | FileCheck %s // CHECK-LABEL: llvm.func @complex_abs // CHECK-SAME: %[[ARG:.*]]: ![[C_TY:.*]]) diff --git a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir index 48a319b3ee283..10b205fc1a35d 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR // BAREPTR-LABEL: func @check_noalias // BAREPTR-SAME: %{{.*}}: !llvm.ptr {llvm.noalias}, %{{.*}}: !llvm.ptr {llvm.noalias} diff --git a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir index 481067cf8d915..5624bf12e72d7 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts)" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: func @empty() { // CHECK-NEXT: llvm.return diff --git a/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir b/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir index af0f49254a055..8bc7f7f52ce57 100644 --- a/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir +++ b/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="convert-math-to-funcs" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(convert-math-to-funcs)" | FileCheck %s // ----- diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir index 7f4b9634de3ba..bcdbad1709e93 100644 --- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir +++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-math-to-llvm)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(convert-math-to-llvm))" | FileCheck %s // CHECK-LABEL: @ops func.func @ops(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64) { diff --git a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir index 3d98dbcf8f29d..5aa844c5afc5e 100644 --- a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt -pass-pipeline="convert-memref-to-llvm{use-generic-functions=1}" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(convert-memref-to-llvm{use-generic-functions=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-NOTALIGNED" -// RUN: mlir-opt -pass-pipeline="convert-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1}" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(convert-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-ALIGNED" // CHECK-LABEL: func @alloc() diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir index aaa063d9fc4b0..a058365a104a1 100644 --- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir +++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-THREADS %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0}))" %s | FileCheck --check-prefix=CHECK-BLOCKS %s // CHECK-THREADS-LABEL: @one_d_loop // CHECK-BLOCKS-LABEL: @one_d_loop diff --git a/mlir/test/Conversion/SCFToGPU/step_one.mlir b/mlir/test/Conversion/SCFToGPU/step_one.mlir index c4668bfeba747..be6fadfbd0ad3 100644 --- a/mlir/test/Conversion/SCFToGPU/step_one.mlir +++ b/mlir/test/Conversion/SCFToGPU/step_one.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-11 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2}))" %s | FileCheck --check-prefix=CHECK-22 %s // CHECK-11-LABEL: @step_1 // CHECK-22-LABEL: @step_1 diff --git a/mlir/test/Conversion/SCFToGPU/step_positive.mlir b/mlir/test/Conversion/SCFToGPU/step_positive.mlir index 65d16046382d6..97fd7d598621b 100644 --- a/mlir/test/Conversion/SCFToGPU/step_positive.mlir +++ b/mlir/test/Conversion/SCFToGPU/step_positive.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck %s // CHECK-LABEL: @step_var func.func @step_var(%A : memref, %B : memref) { diff --git a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir index 0c6aca5e6a5d7..7fa37b869473c 100644 --- a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir +++ b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-shape-constraints)" <%s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-shape-constraints))" <%s | FileCheck %s // There's not very much useful to check here other than pasting the output. // CHECK-LABEL: func @cstr_broadcastable( diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index 811bf28535e55..df22ee18639fc 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" %s -verify-diagnostics -o -| FileCheck %s // CHECK-LABEL: @matmul func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir index d1506d0c304c0..70722c893ca42 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg)" %s -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -o -| FileCheck %s // CHECK: #map = affine_map<(d0, d1, d2, d3) -> (d0, d3)> // CHECK: #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 6c2626dd1ac00..2aeb7c8607719 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg)" %s -verify-diagnostics -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -verify-diagnostics -o -| FileCheck %s // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()> diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir index ae6329c22eff7..6e6d384f82105 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-vector-to-gpu{use-nvgpu=true})" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu{use-nvgpu=true}))" | FileCheck %s //######################################################### // INT8 row-row-row @@ -664,4 +664,4 @@ func.func @m16n8k8_tf32_f32_col_col_row(%arg0: memref<20x20xf32, 3>, %arg1: memr // CHECK: vector.store vector.transfer_write %D, %arg2[%c16, %c8] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> return -} \ No newline at end of file +} diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index 5da76a95dc2a1..afe3d5d229b9c 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu),canonicalize" | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu),canonicalize)" | FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir index 1a91e8b9fbc30..dac8e018f845f 100644 --- a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-tensors=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_2d( // CHECK: %[[ALLOC:.*]] = memref.alloca() : memref> diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir index 4da63a90c5c1a..f8da970b7fc29 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-tensors=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_2d( // CHECK: %[[V_INIT:.*]] = arith.constant dense<-4.200000e+01> : vector<4x9xf32> diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir index 7cf8cc1b05d41..3817f78f5cdd5 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_inbounds func.func @transfer_read_inbounds(%A : memref) -> (vector<2x3x4xf32>) { diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir index d4b5b34a96bad..8468b813e8f25 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true})" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true}))" -split-input-file | FileCheck %s // Ensure that the permutation map is lowered (by inserting a transpose op) // before lowering the vector.transfer_read. diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir index e7b72eb1364a8..0d5678117dfb4 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf)" -split-input-file -allow-unregistered-dialect | FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf))" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL // CHECK-LABEL: func @vector_transfer_ops_0d( func.func @vector_transfer_ops_0d(%M: memref) { diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir index 5f4b2e90705c2..a4118904d0fe3 100644 --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // ----- diff --git a/mlir/test/Dialect/Affine/loop-unswitch.mlir b/mlir/test/Dialect/Affine/loop-unswitch.mlir index 19c1eed705c53..5a58941937bf5 100644 --- a/mlir/test/Dialect/Affine/loop-unswitch.mlir +++ b/mlir/test/Dialect/Affine/loop-unswitch.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(test-affine-loop-unswitch)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(test-affine-loop-unswitch))" | FileCheck %s // CHECK-DAG: #[[$SET:.*]] = affine_set<(d0) : (d0 - 2 >= 0)> diff --git a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir index 50cb32076e61e..cce1946b391e7 100644 --- a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir +++ b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-memref-stride-calculation)" -o /dev/null | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-memref-stride-calculation))" -o /dev/null | FileCheck %s func.func @f(%0: index) { // CHECK-LABEL: Testing: f diff --git a/mlir/test/Dialect/ControlFlow/canonicalize.mlir b/mlir/test/Dialect/ControlFlow/canonicalize.mlir index 9ad790af8f499..8cef84549c8f0 100644 --- a/mlir/test/Dialect/ControlFlow/canonicalize.mlir +++ b/mlir/test/Dialect/ControlFlow/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck --dump-input-context 20 %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck --dump-input-context 20 %s /// Test the folding of BranchOp. diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir index 3d603ea38677b..db33f5cf4b5b0 100644 --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s gpu.module @foo { diff --git a/mlir/test/Dialect/LLVMIR/terminator.mlir b/mlir/test/Dialect/LLVMIR/terminator.mlir index 6c2a2bf00f09e..86b70735d3efa 100644 --- a/mlir/test/Dialect/LLVMIR/terminator.mlir +++ b/mlir/test/Dialect/LLVMIR/terminator.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline='func.func(canonicalize)' %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline='builtin.module(func.func(canonicalize))' %s | FileCheck %s // verify that terminators survive the canonicalizer // CHECK-LABEL: @return diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir index 41cc866060302..a6552e0a5264e 100644 --- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir +++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-elementwise-to-linalg)" -split-input-file %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-elementwise-to-linalg))" -split-input-file %s | FileCheck %s // In-depth checking of the linalg.generic op for a very trivial case. // CHECK: #[[$MAP:.*]] = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir index 5450580dbf1a4..6fa84301be600 100644 --- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s #map = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir index c0cf7abb7d21b..87820d0a90a8a 100644 --- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s // TODO: Detensoring breaks if %arg0 or %arg1 are passed directly as tensors. Fix that. func.func @if_true_test(%arg0: i1, %arg1: i32) -> tensor attributes {} { diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir index 1720d6f9ece14..d11c5a5d6ce5f 100644 --- a/mlir/test/Dialect/Linalg/detensorize_if.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir index fa65ae3ec9654..02fa7ace13b9d 100644 --- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir index 7b70053c9266b..e10c46c629aa0 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir index a0d3cff344add..5af2ff8ac84e6 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> #map1 = affine_map<(i) -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir index 59137f949b356..9e0706322ba69 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index 12ecdda129b9f..c96f95a1b517a 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(linalg-fold-unit-extent-dims))" | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir index 3107583640206..3822fe8c39748 100644 --- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir +++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only})" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only}))" | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, diff --git a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir index f4e855f56b54d..bb1108ea3115a 100644 --- a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir +++ b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file --pass-pipeline='func.func(nvgpu-optimize-shared-memory)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file --pass-pipeline='builtin.module(func.func(nvgpu-optimize-shared-memory))' | FileCheck %s // CHECK: @optimize_128x32xf16_32x128xf16([[arg0:%.+]]: memref<{{.*}}>, [[ldRow:%.+]]: index, [[ldCol:%.+]]: index, [[stRow:%.+]]: index, [[stCol:%.+]]: index, [[fragRow:%.+]]: index, [[fragCol:%.+]]: index) func.func @optimize_128x32xf16_32x128xf16(%arg0: memref<128x128xf16>, diff --git a/mlir/test/Dialect/Quant/canonicalize.mlir b/mlir/test/Dialect/Quant/canonicalize.mlir index fca8116d40e76..c67f1290c9d76 100644 --- a/mlir/test/Dialect/Quant/canonicalize.mlir +++ b/mlir/test/Dialect/Quant/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // ----- // CHECK-LABEL: redundant_scast diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 7dde0d7647f30..b6ac36282fc43 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // ----- diff --git a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir index 40a12ac5afb17..d2e14f3e25fa8 100644 --- a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir +++ b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-to-while)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-for-to-while))' -split-input-file | FileCheck %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py // CHECK-LABEL: func @single_loop( diff --git a/mlir/test/Dialect/SCF/loop-range.mlir b/mlir/test/Dialect/SCF/loop-range.mlir index 3494621fb92bc..cd3b4861fc18d 100644 --- a/mlir/test/Dialect/SCF/loop-range.mlir +++ b/mlir/test/Dialect/SCF/loop-range.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-loop-range-folding)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-for-loop-range-folding))' -split-input-file | FileCheck %s func.func @fold_one_loop(%arg0: memref, %arg1: index, %arg2: index) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir index 96befd7d57b99..aab64b2751caf 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-fusion)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-fusion))' -split-input-file | FileCheck %s func.func @fuse_empty_loops() { %c2 = arith.constant 2 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir index 75cade55aef3d..7491550c1dc7c 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true})' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true}))' -split-input-file | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir index af2b567fb7c3f..897f60b29fdbd 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4}))' -split-input-file | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, diff --git a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir index 950cfcc2c0884..b13d6443850c9 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s //===----------------------------------------------------------------------===// // spirv.AccessChain diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir index e0aa98e8d050b..d1937c44262f2 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='spirv.module(inline{default-pipeline=''})' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(spirv.module(inline{default-pipeline=''}))' | FileCheck %s spirv.module Logical GLSL450 { spirv.func @callee() "None" { diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index c3d9ae24c0891..3f3a35eb52b0d 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file -allow-unregistered-dialect | FileCheck %s // ----- diff --git a/mlir/test/IR/diagnostic-handler-filter.mlir b/mlir/test/IR/diagnostic-handler-filter.mlir index 83ba053065943..39374a919fb95 100644 --- a/mlir/test/IR/diagnostic-handler-filter.mlir +++ b/mlir/test/IR/diagnostic-handler-filter.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-diagnostic-filter{filters=mysource1})" -split-input-file -o - 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-diagnostic-filter{filters=mysource1}))" -split-input-file -o - 2>&1 | FileCheck %s // This test verifies that diagnostic handler can emit the call stack successfully. // CHECK-LABEL: Test 'test1' diff --git a/mlir/test/IR/test-clone.mlir b/mlir/test/IR/test-clone.mlir index be8cef05d17ec..7c720288c95b9 100644 --- a/mlir/test/IR/test-clone.mlir +++ b/mlir/test/IR/test-clone.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(test-clone)" -split-input-file +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" -split-input-file module { func.func @fixpoint(%arg1 : i32) -> i32 { diff --git a/mlir/test/IR/test-matchers.mlir b/mlir/test/IR/test-matchers.mlir index 074572a591ab7..87c7bf9e7ebc8 100644 --- a/mlir/test/IR/test-matchers.mlir +++ b/mlir/test/IR/test-matchers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="func.func(test-matchers)" -o /dev/null 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="builtin.module(func.func(test-matchers))" -o /dev/null 2>&1 | FileCheck %s func.func @test1(%a: f32, %b: f32, %c: f32) { %0 = arith.addf %a, %b: f32 diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir index 300e75b519092..f661792b9ab48 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(canonicalize,cse),one-shot-bufferize{bufferize-function-boundaries}" |\ -// RUN: mlir-opt -pass-pipeline="func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" |\ -// RUN: mlir-opt -pass-pipeline="func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(canonicalize,cse),one-shot-bufferize{bufferize-function-boundaries})" |\ +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops))" |\ +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext |\ diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir index 41dec2912f5c4..5372280e8a012 100644 --- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir +++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir index 3d5165e797ade..973ca5bc95383 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir index 17311904d8919..ee079c44f5822 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir index 6bad868bba214..225ff563cbff0 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir index 66c7be56a12ec..ccafa5b36a7c6 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir index 2bdd1caf84e6b..929a30a4d4788 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir index 5c54272a93c91..1a3fe7cf9cefb 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -2,7 +2,7 @@ // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\ // RUN: mlir-opt -lower-affine -convert-scf-to-cf -convert-vector-to-llvm \ // RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\ -// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir index 0740c97d45857..4a26080a654ad 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -3,7 +3,7 @@ // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ // RUN: -gpu-kernel-outlining |\ -// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ @@ -16,7 +16,7 @@ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ // RUN: -gpu-kernel-outlining |\ -// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ @@ -28,7 +28,7 @@ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ // RUN: -gpu-kernel-outlining |\ -// RUN: mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ // RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir index 06896b5bfa0ed..00fc729ed158d 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ // RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir index b690d50e2d6fc..cbbde4ac63345 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ // RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index f8461a5dddd1d..0f61f1ebcbd6b 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir index b1f76c75593a0..03948f6ee958e 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir index 41b373d8461aa..5e1127ebcce59 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index 1727668423acc..92f6a804ece15 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index ff9b20ab40a03..3b988e29cb39f 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index b7dbb95ed6e86..485bdcd5d0a3e 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index 7c6d494bcf9a6..eac5ecfc5b449 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir index c09ab7a9819a8..ffb11ab27c7f4 100644 --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \ // RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \ // RUN: | mlir-opt -convert-async-to-llvm -convert-func-to-llvm \ diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir index e1ad2f61185b1..b52d688bb75f6 100644 --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir index bb2c8439a1db2..c980f39c3dbb5 100644 --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir index a72ee8b9214e5..be59529b27202 100644 --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir index b30f073307d8b..446be5709f36a 100644 --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir index 1cc7db41260f6..be27a0194fd43 100644 --- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir +++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip}))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir index 29c3c63c91292..5aa51b53431cb 100644 --- a/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip}))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir index 196b412fe088e..92ff32ed18b72 100644 --- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir +++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip}))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir index 9041441e10958..ffb7de9030bff 100644 --- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir +++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -convert-scf-to-cf \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip}))' \ // RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir index ca7da5d730e68..af3b1e9b8c3db 100644 --- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir +++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -convert-scf-to-cf \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip}))' \ // RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ diff --git a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir index 9901f7f2474e2..bcf49ad978996 100644 --- a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir +++ b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir @@ -1,5 +1,5 @@ // Check that local reproducers will also traverse dynamic pass pipelines. -// RUN: mlir-opt %s -pass-pipeline='test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-failure}' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-failure})' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL_DYNAMIC_FAILURE %s // The crash recovery mechanism will leak memory allocated in the crashing thread. diff --git a/mlir/test/Pass/crash-recovery.mlir b/mlir/test/Pass/crash-recovery.mlir index 91030a4bdfd52..cb586d8b5dcbc 100644 --- a/mlir/test/Pass/crash-recovery.mlir +++ b/mlir/test/Pass/crash-recovery.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass, test-pass-crash)' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass, test-pass-crash))' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics // RUN: cat %t | FileCheck -check-prefix=REPRO %s -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass, test-pass-crash)' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer -mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass, test-pass-crash))' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer -mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL %s // Check that we correctly handle verifiers passes with local reproducer, this used to crash. @@ -8,7 +8,7 @@ // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL %s // Check that local reproducers will also traverse dynamic pass pipelines. -// RUN: mlir-opt %s -pass-pipeline='test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-crash}' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-crash})' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL_DYNAMIC %s // The crash recovery mechanism will leak memory allocated in the crashing thread. diff --git a/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir b/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir index 80219503da20b..c429ad3afb052 100644 --- a/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir +++ b/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-parent=1 dynamic-pipeline=test-patterns})' -split-input-file -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-parent=1 dynamic-pipeline=test-patterns}))' -split-input-file -verify-diagnostics // Verify that we fail to schedule a dynamic pipeline on the parent operation. diff --git a/mlir/test/Pass/dynamic-pipeline-nested.mlir b/mlir/test/Pass/dynamic-pipeline-nested.mlir index 35e909d74dd34..ac2fdd3265b63 100644 --- a/mlir/test/Pass/dynamic-pipeline-nested.mlir +++ b/mlir/test/Pass/dynamic-pipeline-nested.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 dynamic-pipeline=cse})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NOTNESTED --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=cse})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NESTED --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 dynamic-pipeline=cse}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NOTNESTED --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=cse}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NESTED --check-prefix=CHECK // Verify that we can schedule a dynamic pipeline on a nested operation diff --git a/mlir/test/Pass/dynamic-pipeline.mlir b/mlir/test/Pass/dynamic-pipeline.mlir index f037ae8044f82..5e31ba476aeb0 100644 --- a/mlir/test/Pass/dynamic-pipeline.mlir +++ b/mlir/test/Pass/dynamic-pipeline.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK func.func @f() { diff --git a/mlir/test/Pass/generic-pipeline.mlir b/mlir/test/Pass/generic-pipeline.mlir index 00c6c767c7707..dfd17d59dd270 100644 --- a/mlir/test/Pass/generic-pipeline.mlir +++ b/mlir/test/Pass/generic-pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='any(cse, test-interface-pass)' -allow-unregistered-dialect -o /dev/null +// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='builtin.module(any(cse, test-interface-pass))' -allow-unregistered-dialect -o /dev/null // Test that we execute generic pipelines correctly. The `cse` pass is fully generic and should execute // on both the module and the func. The `test-interface-pass` filters based on FunctionOpInterface and diff --git a/mlir/test/Pass/interface-pass.mlir b/mlir/test/Pass/interface-pass.mlir index e07237b5e8ff5..d02e8937f43aa 100644 --- a/mlir/test/Pass/interface-pass.mlir +++ b/mlir/test/Pass/interface-pass.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='func.func(test-interface-pass)' -o /dev/null +// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='builtin.module(func.func(test-interface-pass))' -o /dev/null // Test that we run the interface pass on the function. diff --git a/mlir/test/Pass/invalid-interface-pass.mlir b/mlir/test/Pass/invalid-interface-pass.mlir index eb36958b8b834..8f58f39200308 100644 --- a/mlir/test/Pass/invalid-interface-pass.mlir +++ b/mlir/test/Pass/invalid-interface-pass.mlir @@ -1,4 +1,4 @@ -// RUN: not mlir-opt %s -pass-pipeline='test-interface-pass' 2>&1 | FileCheck %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-interface-pass)' 2>&1 | FileCheck %s // Test that we emit an error when an interface pass is added to a pass manager it can't be scheduled on. diff --git a/mlir/test/Pass/invalid-ir-print-after-failure.mlir b/mlir/test/Pass/invalid-ir-print-after-failure.mlir index 012d999e23055..6910abe20be92 100644 --- a/mlir/test/Pass/invalid-ir-print-after-failure.mlir +++ b/mlir/test/Pass/invalid-ir-print-after-failure.mlir @@ -1,11 +1,11 @@ -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM -// RUN: mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM // Check that `-mlir-print-assume-verified` will print custom even when the IR is invalid. -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM // Test whether we print generically or not on pass failure, depending on whether there is invalid IR or not. diff --git a/mlir/test/Pass/invalid-parent.mlir b/mlir/test/Pass/invalid-parent.mlir index 1e2865dad1bf7..c6f54c43516fa 100644 --- a/mlir/test/Pass/invalid-parent.mlir +++ b/mlir/test/Pass/invalid-parent.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-pass-invalid-parent)' -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-invalid-parent))' -verify-diagnostics // Test that we properly report errors when the parent becomes invalid after running a pass // on a child operation. diff --git a/mlir/test/Pass/invalid-pass.mlir b/mlir/test/Pass/invalid-pass.mlir index 5a8b8386ce150..c9e37cc4984af 100644 --- a/mlir/test/Pass/invalid-pass.mlir +++ b/mlir/test/Pass/invalid-pass.mlir @@ -1,4 +1,4 @@ -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{test-option=a})' 2>&1 | FileCheck %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{test-option=a}))' 2>&1 | FileCheck %s // CHECK: : no such option test-option // CHECK: failed to add `test-module-pass` with options `test-option=a` diff --git a/mlir/test/Pass/ir-printing.mlir b/mlir/test/Pass/ir-printing.mlir index bd506c2c7ebda..048b721ba6d53 100644 --- a/mlir/test/Pass/ir-printing.mlir +++ b/mlir/test/Pass/ir-printing.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before=cse -mlir-print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,cse)' -mlir-print-ir-after-all -mlir-print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s -// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,test-pass-failure)' -mlir-print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before=cse -mlir-print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,cse))' -mlir-print-ir-after-all -mlir-print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s +// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,test-pass-failure))' -mlir-print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s func.func @foo() { %0 = arith.constant 0 : i32 diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir index 63cfa3702bdbc..bd5d611b47154 100644 --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_PIPELINE %s // LIST: Execution time report diff --git a/mlir/test/Pass/pipeline-options-parsing.mlir b/mlir/test/Pass/pipeline-options-parsing.mlir index 436dfce4d1d96..33bef75ee94a2 100644 --- a/mlir/test/Pass/pipeline-options-parsing.mlir +++ b/mlir/test/Pass/pipeline-options-parsing.mlir @@ -1,11 +1,11 @@ -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{test-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), test-module-pass{invalid-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s -// RUN: not mlir-opt %s -pass-pipeline='test-options-pass{list=3 list=notaninteger}' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s -// RUN: mlir-opt %s -pass-pipeline='func.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2})' -// RUN: mlir-opt %s -verify-each=false -pass-pipeline='func.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}})' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{test-option=3}))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(func.func(test-options-pass{list=3}), test-module-pass{invalid-option=3}))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-options-pass{list=3 list=notaninteger})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2}))' +// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(func.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}}))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s // RUN: mlir-opt %s -verify-each=false -test-options-pass-pipeline='list=1 string-list=a,b' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_2 %s -// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), func.func(test-options-pass{list=1,2,3,4}))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s +// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(builtin.module(func.func(test-options-pass{list=3}), func.func(test-options-pass{list=1,2,3,4})))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s // CHECK_ERROR_1: missing closing '}' while processing pass options // CHECK_ERROR_2: no such option test-option diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir index 3e7ce7cb68020..6291dd647391b 100644 --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass),func.func(cse,canonicalize)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass),func.func(cse,canonicalize))' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse))' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK +// RUN: not mlir-opt %s -pass-pipeline='any(builtin.module(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module()(' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s -// RUN: not mlir-opt %s -pass-pipeline=',' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s +// RUN: not mlir-opt %s -pass-pipeline='any(builtin.module()()' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s +// RUN: not mlir-opt %s -pass-pipeline='any(,)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s // RUN: not mlir-opt %s -pass-pipeline='func.func(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_5 %s // CHECK_ERROR_1: encountered unbalanced parentheses while parsing pipeline @@ -16,6 +16,12 @@ // RUN: not mlir-opt %s -pass-pipeline='' -cse 2>&1 | FileCheck --check-prefix=CHECK_ERROR_6 %s // CHECK_ERROR_6: '-pass-pipeline' option can't be used with individual pass options +// RUN: not mlir-opt %s -pass-pipeline='wrong-op()' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_7 %s +// CHECK_ERROR_7: can't run 'wrong-op' pass manager on 'builtin.module' op + +// RUN: mlir-opt %s -pass-pipeline='any(cse)' -dump-pass-pipeline 2>&1 | FileCheck %s -check-prefix=CHECK_ROUNDTRIP +// CHECK_ROUNDTRIP: any(cse) + func.func @foo() { return } diff --git a/mlir/test/Pass/pipeline-stats.mlir b/mlir/test/Pass/pipeline-stats.mlir index e71cb69fa99af..0f8b02c1057ea 100644 --- a/mlir/test/Pass/pipeline-stats.mlir +++ b/mlir/test/Pass/pipeline-stats.mlir @@ -1,6 +1,6 @@ // REQUIRES: asserts -// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -mlir-pass-statistics -mlir-pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -mlir-pass-statistics -mlir-pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.module(func.func(test-stats-pass,test-stats-pass))' -mlir-pass-statistics -mlir-pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.module(func.func(test-stats-pass,test-stats-pass))' -mlir-pass-statistics -mlir-pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s // LIST: Pass statistics report // LIST: TestStatisticPass diff --git a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir index 18d10c0497974..dfb4e4fd309a9 100644 --- a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir +++ b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(arm-neon-2d-to-intr)" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(arm-neon-2d-to-intr))" %s | FileCheck %s // CHECK-LABEL: arm_neon_sdot2d_4x4_i8i8 func.func @arm_neon_sdot2d_4x4_i8i8(%a: vector<4xi32>, %b: vector<4x4xi8>, %c: vector<4x4xi8>) -> vector<4xi32> { diff --git a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir index 924842d6388dc..9f3eeb569060c 100644 --- a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir +++ b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | mlir-translate -mlir-to-llvmir | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-translate -mlir-to-llvmir | FileCheck %s func.func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir index 3a19cac5d3070..d33c911e042d7 100644 --- a/mlir/test/Transforms/canonicalize-block-merge.mlir +++ b/mlir/test/Transforms/canonicalize-block-merge.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // Check the simple case of single operation blocks with a return. diff --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir index d118768764376..46545d2e9fd51 100644 --- a/mlir/test/Transforms/canonicalize-dce.mlir +++ b/mlir/test/Transforms/canonicalize-dce.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // Test case: Simple case of deleting a dead pure op. diff --git a/mlir/test/Transforms/canonicalize-td.mlir b/mlir/test/Transforms/canonicalize-td.mlir index 549b302534914..46fc4d085ba56 100644 --- a/mlir/test/Transforms/canonicalize-td.mlir +++ b/mlir/test/Transforms/canonicalize-td.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize{top-down=true})' | FileCheck %s --check-prefix=TD -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=BU +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize{top-down=true}))' | FileCheck %s --check-prefix=TD +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s --check-prefix=BU // BU-LABEL: func @default_insertion_position diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 20538cd6262d3..df1555db666bb 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // CHECK-LABEL: func @test_subi_zero func.func @test_subi_zero(%arg0: i32) -> i32 { diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir index 7a8de218bdd82..dbc2d5efb36ad 100644 --- a/mlir/test/Transforms/cse.mlir +++ b/mlir/test/Transforms/cse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(cse)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(cse))' | FileCheck %s // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 mod 2)> #map0 = affine_map<(d0) -> (d0 mod 2)> diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index 224ed2d2f39de..90c3f5d28d6af 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s // CHECK-LABEL: func @parallel_many_dims() { func.func @parallel_many_dims() { diff --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir index a2e7a7718285b..b6ef0088d868b 100644 --- a/mlir/test/Transforms/parametric-mapping.mlir +++ b/mlir/test/Transforms/parametric-mapping.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="func.func(test-mapping-to-processing-elements)" %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(test-mapping-to-processing-elements))" %s | FileCheck %s // CHECK: #[[mul_map:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> // CHECK: #[[add_map:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir index 3ed1c3c131358..f31f749b6e75a 100644 --- a/mlir/test/Transforms/sccp-callgraph.mlir +++ b/mlir/test/Transforms/sccp-callgraph.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s --check-prefix=FUNC +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(builtin.module(sccp))" -split-input-file | FileCheck %s --check-prefix=NESTED +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s --check-prefix=FUNC /// Check that a constant is properly propagated through the arguments and /// results of a private function. diff --git a/mlir/test/Transforms/sccp-structured.mlir b/mlir/test/Transforms/sccp-structured.mlir index 529d41554a473..f17f5ff51cf3f 100644 --- a/mlir/test/Transforms/sccp-structured.mlir +++ b/mlir/test/Transforms/sccp-structured.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s /// Check that a constant is properly propagated when only one edge is taken. diff --git a/mlir/test/Transforms/sccp.mlir b/mlir/test/Transforms/sccp.mlir index d4ca3a2b492a3..db24432b65cc6 100644 --- a/mlir/test/Transforms/sccp.mlir +++ b/mlir/test/Transforms/sccp.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s /// Check simple forward constant propagation without any control flow. diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir index 5645f8b779183..91cab126d8dd6 100644 --- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s func.func @collapse_to_single() { %c0 = arith.constant 3 : index diff --git a/mlir/test/Transforms/test-canonicalize-filter.mlir b/mlir/test/Transforms/test-canonicalize-filter.mlir index 5e3738e531b0a..dba5f05e84345 100644 --- a/mlir/test/Transforms/test-canonicalize-filter.mlir +++ b/mlir/test/Transforms/test-canonicalize-filter.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=NO_FILTER -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_ENABLE -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_DISABLE +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s --check-prefix=NO_FILTER +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps}))' | FileCheck %s --check-prefix=FILTER_ENABLE +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps}))' | FileCheck %s --check-prefix=FILTER_DISABLE // NO_FILTER-LABEL: func @remove_op_with_inner_ops_pattern // NO_FILTER-NEXT: return diff --git a/mlir/test/Transforms/test-canonicalize.mlir b/mlir/test/Transforms/test-canonicalize.mlir index 2181d1856d3aa..bc463fefe6534 100644 --- a/mlir/test/Transforms/test-canonicalize.mlir +++ b/mlir/test/Transforms/test-canonicalize.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{region-simplify=false})' | FileCheck %s --check-prefixes=CHECK,NO-RS +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{region-simplify=false}))' | FileCheck %s --check-prefixes=CHECK,NO-RS // CHECK-LABEL: func @remove_op_with_inner_ops_pattern func.func @remove_op_with_inner_ops_pattern() { diff --git a/mlir/test/Transforms/test-operation-folder-commutative.mlir b/mlir/test/Transforms/test-operation-folder-commutative.mlir index ea8f9b3f42928..89896e3bf99a8 100644 --- a/mlir/test/Transforms/test-operation-folder-commutative.mlir +++ b/mlir/test/Transforms/test-operation-folder-commutative.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --pass-pipeline="func.func(test-patterns)" %s | FileCheck %s +// RUN: mlir-opt --pass-pipeline="builtin.module(func.func(test-patterns))" %s | FileCheck %s // CHECK-LABEL: func @test_reorder_constants_and_match func.func @test_reorder_constants_and_match(%arg0 : i32) -> (i32) { diff --git a/mlir/test/Transforms/test-symbol-dce.mlir b/mlir/test/Transforms/test-symbol-dce.mlir index fa6da2202fc3d..7bd784928e6f3 100644 --- a/mlir/test/Transforms/test-symbol-dce.mlir +++ b/mlir/test/Transforms/test-symbol-dce.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -symbol-dce -split-input-file -verify-diagnostics | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(symbol-dce)" -split-input-file | FileCheck %s --check-prefix=NESTED +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(builtin.module(symbol-dce))" -split-input-file | FileCheck %s --check-prefix=NESTED // Check that trivially dead and trivially live non-nested cases are handled. diff --git a/mlir/test/mlir-cpu-runner/async-error.mlir b/mlir/test/mlir-cpu-runner/async-error.mlir index 321f243a3debe..a1ca96fae105e 100644 --- a/mlir/test/mlir-cpu-runner/async-error.mlir +++ b/mlir/test/mlir-cpu-runner/async-error.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async-group.mlir b/mlir/test/mlir-cpu-runner/async-group.mlir index ef6c69ee19e2f..08c546b06bc81 100644 --- a/mlir/test/mlir-cpu-runner/async-group.mlir +++ b/mlir/test/mlir-cpu-runner/async-group.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async-value.mlir b/mlir/test/mlir-cpu-runner/async-value.mlir index 73627fe314e2f..cafdc9f57a7aa 100644 --- a/mlir/test/mlir-cpu-runner/async-value.mlir +++ b/mlir/test/mlir-cpu-runner/async-value.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir index 272fa04904858..85e260f504f91 100644 --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir index 55a4ba6a19490..3720402f8caec 100644 --- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts)" | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s // Verify bare pointer memref calling convention. `simple_add1_add2_test` // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second diff --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir index 2c62bf5324c88..d1769dccef3ef 100644 --- a/mlir/test/mlir-cpu-runner/copy.mlir +++ b/mlir/test/mlir-cpu-runner/copy.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/global-memref.mlir b/mlir/test/mlir-cpu-runner/global-memref.mlir index 7ebd24177ca1c..937d8d3afb248 100644 --- a/mlir/test/mlir-cpu-runner/global-memref.mlir +++ b/mlir/test/mlir-cpu-runner/global-memref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s func.func private @printMemrefF32(memref<*xf32>) attributes { llvm.emit_c_interface } func.func private @printMemrefI32(memref<*xi32>) attributes { llvm.emit_c_interface } diff --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir index cbaeaed54cdaa..18c1b78e1c417 100644 --- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir +++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir index 4895e303a2d30..134a1293d28cd 100644 --- a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir +++ b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf),convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf),convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/memref-reshape.mlir b/mlir/test/mlir-cpu-runner/memref-reshape.mlir index 05eb536decd79..47ee21a7784aa 100644 --- a/mlir/test/mlir-cpu-runner/memref-reshape.mlir +++ b/mlir/test/mlir-cpu-runner/memref-reshape.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/print.mlir b/mlir/test/mlir-cpu-runner/print.mlir index dec09521d3f81..039b32d64c727 100644 --- a/mlir/test/mlir-cpu-runner/print.mlir +++ b/mlir/test/mlir-cpu-runner/print.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir index da40d5fdb5165..5ebafbe668020 100644 --- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s func.func @main() { %A = memref.alloc() : memref<16x16xf32> diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir index 6f552c96bfe8a..f8a37f12c051c 100644 --- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir index 3a9cf9332bf60..565495b802f2b 100644 --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D func.func @print_0d() { %f = arith.constant 2.00000e+00 : f32 diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir index 05f13f0710780..d5eafd1c6aeff 100644 --- a/mlir/test/mlir-opt/async.mlir +++ b/mlir/test/mlir-opt/async.mlir @@ -1,6 +1,6 @@ // Check if mlir marks the corresponding function with required coroutine attribute. // -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | FileCheck %s // CHECK: llvm.func @async_execute_fn{{.*}}attributes{{.*}}presplitcoroutine From d7f0f4a0a23e4056dd2545b84e294839893abdb2 Mon Sep 17 00:00:00 2001 From: Jan Sjodin Date: Thu, 3 Nov 2022 11:41:21 -0400 Subject: [PATCH 155/516] [OpenMP][OMPIRBuilder] Migrate createOffloadEntriesAndInfoMetadata from clang to OpenMPIRBuilder Remove unused [this] capture in lambda. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 89629583dc5c2..96ea0eb21882b 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4748,7 +4748,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( // Create the offloading info metadata node. NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); auto &&TargetRegionMetadataEmitter = - [this, &C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( const TargetRegionEntryInfo &EntryInfo, const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &E) { // Generate metadata for target regions. Each entry of this metadata From d511a5d47166b024a4ce5e4202bf83400acef05c Mon Sep 17 00:00:00 2001 From: rkayaith Date: Tue, 20 Sep 2022 20:20:44 -0400 Subject: [PATCH 156/516] [mlir] Include anchor op in reproducer pipeline string Including the anchor op ensures that all pass manager settings are fully specified, and makes the string consistent with the printed form. Depends on D134622 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D134623 --- mlir/docs/PassManagement.md | 4 ++-- mlir/lib/Pass/PassCrashRecovery.cpp | 16 +++++++++++----- .../Pass/crash-recovery-dynamic-failure.mlir | 2 +- mlir/test/Pass/crash-recovery.mlir | 6 +++--- mlir/test/Pass/run-reproducer.mlir | 5 ++++- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index ed225a84afb7c..9842cb9214461 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -1328,7 +1328,7 @@ module { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(cse,canonicalize),inline", + pipeline: "builtin.module(func.func(cse,canonicalize),inline)", disable_threading: true, verify_each: true } @@ -1371,7 +1371,7 @@ module { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(canonicalize)", + pipeline: "builtin.module(func.func(canonicalize))", disable_threading: true, verify_each: true } diff --git a/mlir/lib/Pass/PassCrashRecovery.cpp b/mlir/lib/Pass/PassCrashRecovery.cpp index 98ea35cf02c3b..a98a1f1f6e475 100644 --- a/mlir/lib/Pass/PassCrashRecovery.cpp +++ b/mlir/lib/Pass/PassCrashRecovery.cpp @@ -60,7 +60,7 @@ struct RecoveryReproducerContext { static void registerSignalHandler(); /// The textual description of the currently executing pipeline. - std::string pipeline; + std::string pipelineElements; /// The MLIR operation representing the IR before the crash. Operation *preCrashOperation; @@ -93,8 +93,8 @@ llvm::ManagedStatic> RecoveryReproducerContext::RecoveryReproducerContext( std::string passPipelineStr, Operation *op, PassManager::ReproducerStreamFactory &streamFactory, bool verifyPasses) - : pipeline(std::move(passPipelineStr)), preCrashOperation(op->clone()), - streamFactory(streamFactory), + : pipelineElements(std::move(passPipelineStr)), + preCrashOperation(op->clone()), streamFactory(streamFactory), disableThreads(!op->getContext()->isMultithreadingEnabled()), verifyPasses(verifyPasses) { enable(); @@ -118,6 +118,9 @@ void RecoveryReproducerContext::generate(std::string &description) { } descOS << "reproducer generated at `" << stream->description() << "`"; + std::string pipeline = (preCrashOperation->getName().getStringRef() + "(" + + pipelineElements + ")") + .str(); AsmState state(preCrashOperation); state.attachResourcePrinter( "mlir_reproducer", [&](Operation *op, AsmResourceBuilder &builder) { @@ -470,9 +473,12 @@ void PassReproducerOptions::attachResourceParser(ParserConfig &config) { } LogicalResult PassReproducerOptions::apply(PassManager &pm) const { - if (pipeline.has_value()) - if (failed(parsePassPipeline(*pipeline, pm))) + if (pipeline.has_value()) { + FailureOr reproPm = parsePassPipeline(*pipeline); + if (failed(reproPm)) return failure(); + static_cast(pm) = std::move(*reproPm); + } if (disableThreading.has_value()) pm.getContext()->disableMultithreading(*disableThreading); diff --git a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir index bcf49ad978996..69e087d5aa83a 100644 --- a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir +++ b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir @@ -15,4 +15,4 @@ module @inner_mod1 { // REPRO_LOCAL_DYNAMIC_FAILURE: module @inner_mod1 // REPRO_LOCAL_DYNAMIC_FAILURE: module @foo { -// REPRO_LOCAL_DYNAMIC_FAILURE: pipeline: "builtin.module(test-pass-failure)" +// REPRO_LOCAL_DYNAMIC_FAILURE: pipeline: "builtin.module(builtin.module(test-pass-failure))" diff --git a/mlir/test/Pass/crash-recovery.mlir b/mlir/test/Pass/crash-recovery.mlir index cb586d8b5dcbc..e636064d26e34 100644 --- a/mlir/test/Pass/crash-recovery.mlir +++ b/mlir/test/Pass/crash-recovery.mlir @@ -22,12 +22,12 @@ module @inner_mod1 { // REPRO: module @inner_mod1 // REPRO: module @foo { -// REPRO: pipeline: "builtin.module(test-module-pass,test-pass-crash)" +// REPRO: pipeline: "builtin.module(builtin.module(test-module-pass,test-pass-crash))" // REPRO_LOCAL: module @inner_mod1 // REPRO_LOCAL: module @foo { -// REPRO_LOCAL: pipeline: "builtin.module(test-pass-crash)" +// REPRO_LOCAL: pipeline: "builtin.module(builtin.module(test-pass-crash))" // REPRO_LOCAL_DYNAMIC: module @inner_mod1 // REPRO_LOCAL_DYNAMIC: module @foo { -// REPRO_LOCAL_DYNAMIC: pipeline: "builtin.module(test-pass-crash)" +// REPRO_LOCAL_DYNAMIC: pipeline: "builtin.module(builtin.module(test-pass-crash))" diff --git a/mlir/test/Pass/run-reproducer.mlir b/mlir/test/Pass/run-reproducer.mlir index 6627033d8be7d..496471d032a52 100644 --- a/mlir/test/Pass/run-reproducer.mlir +++ b/mlir/test/Pass/run-reproducer.mlir @@ -1,3 +1,4 @@ +// RUN: mlir-opt %s -dump-pass-pipeline 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-print-ir-before=cse 2>&1 | FileCheck -check-prefix=BEFORE %s func.func @foo() { @@ -12,7 +13,9 @@ func.func @bar() { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(cse,canonicalize)", + verify_each: true, + // CHECK: builtin.module(func.func(cse,canonicalize{ max-iterations=1 region-simplify=false top-down=false})) + pipeline: "builtin.module(func.func(cse,canonicalize{max-iterations=1 region-simplify=false top-down=false}))", disable_threading: true } } From 215eba4e1ea240dd9223c14b80da664f0bb930cc Mon Sep 17 00:00:00 2001 From: rkayaith Date: Wed, 19 Oct 2022 22:37:12 -0400 Subject: [PATCH 157/516] [mlir][CAPI] Include anchor op in mlirParsePassPipeline The pipeline string must now include the pass manager's anchor op. This makes the parse API properly roundtrip the printed form of a pass manager. Since this is already an API break, I also added an extra callback argument which is used for reporting errors. The old functionality of appending to an existing pass manager is available through `mlirOpPassManagerAddPipeline`. Reviewed By: mehdi_amini, ftynse Differential Revision: https://reviews.llvm.org/D136403 --- mlir/include/mlir-c/Pass.h | 8 +++++--- mlir/lib/CAPI/IR/Pass.cpp | 12 ++++++++---- mlir/test/CAPI/pass.c | 23 +++++++++++++++++------ 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h index 704121a0cb096..721f1f28fe916 100644 --- a/mlir/include/mlir-c/Pass.h +++ b/mlir/include/mlir-c/Pass.h @@ -123,10 +123,12 @@ MLIR_CAPI_EXPORTED void mlirPrintPassPipeline(MlirOpPassManager passManager, MlirStringCallback callback, void *userData); -/// Parse a textual MLIR pass pipeline and add it to the provided OpPassManager. - +/// Parse a textual MLIR pass pipeline and assign it to the provided +/// OpPassManager. If parsing fails an error message is reported using the +/// provided callback. MLIR_CAPI_EXPORTED MlirLogicalResult -mlirParsePassPipeline(MlirOpPassManager passManager, MlirStringRef pipeline); +mlirParsePassPipeline(MlirOpPassManager passManager, MlirStringRef pipeline, + MlirStringCallback callback, void *userData); //===----------------------------------------------------------------------===// // External Pass API. diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp index 30f5804876940..4afc668592bd8 100644 --- a/mlir/lib/CAPI/IR/Pass.cpp +++ b/mlir/lib/CAPI/IR/Pass.cpp @@ -86,10 +86,14 @@ void mlirPrintPassPipeline(MlirOpPassManager passManager, } MlirLogicalResult mlirParsePassPipeline(MlirOpPassManager passManager, - MlirStringRef pipeline) { - // TODO: errors are sent to std::errs() at the moment, we should pass in a - // stream and redirect to a diagnostic. - return wrap(mlir::parsePassPipeline(unwrap(pipeline), *unwrap(passManager))); + MlirStringRef pipeline, + MlirStringCallback callback, + void *userData) { + detail::CallbackOstream stream(callback, userData); + FailureOr pm = parsePassPipeline(unwrap(pipeline), stream); + if (succeeded(pm)) + *unwrap(passManager) = std::move(*pm); + return wrap(pm); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/CAPI/pass.c b/mlir/test/CAPI/pass.c index 5b04d749b1cdc..87430b9e47978 100644 --- a/mlir/test/CAPI/pass.c +++ b/mlir/test/CAPI/pass.c @@ -182,7 +182,8 @@ void testParsePassPipeline() { MlirLogicalResult status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(pm), mlirStringRefCreateFromCString( - "builtin.module(func.func(print-op-stats{json=false}))")); + "builtin.module(func.func(print-op-stats{json=false}))"), + printToStderr, NULL); // Expect a failure, we haven't registered the print-op-stats pass yet. if (mlirLogicalResultIsSuccess(status)) { fprintf( @@ -195,7 +196,8 @@ void testParsePassPipeline() { status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(pm), mlirStringRefCreateFromCString( - "builtin.module(func.func(print-op-stats{json=false}))")); + "builtin.module(func.func(print-op-stats{json=false}))"), + printToStderr, NULL); // Expect a failure, we haven't registered the print-op-stats pass yet. if (mlirLogicalResultIsFailure(status)) { fprintf(stderr, @@ -203,9 +205,7 @@ void testParsePassPipeline() { exit(EXIT_FAILURE); } - // CHECK: Round-trip: builtin.module( - // CHECK-SAME: builtin.module(func.func(print-op-stats{json=false})) - // CHECK-SAME: ) + // CHECK: Round-trip: builtin.module(func.func(print-op-stats{json=false})) fprintf(stderr, "Round-trip: "); mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr, NULL); @@ -221,7 +221,7 @@ void testParsePassPipeline() { exit(EXIT_FAILURE); } // CHECK: Appended: builtin.module( - // CHECK-SAME: builtin.module(func.func(print-op-stats{json=false})), + // CHECK-SAME: func.func(print-op-stats{json=false}), // CHECK-SAME: func.func(print-op-stats{json=false}) // CHECK-SAME: ) fprintf(stderr, "Appended: "); @@ -242,6 +242,14 @@ void testParseErrorCapture() { MlirOpPassManager opm = mlirPassManagerGetAsOpPassManager(pm); MlirStringRef invalidPipeline = mlirStringRefCreateFromCString("invalid"); + // CHECK: mlirParsePassPipeline: + // CHECK: expected pass pipeline to be wrapped with the anchor operation type + fprintf(stderr, "mlirParsePassPipeline:\n"); + if (mlirLogicalResultIsSuccess( + mlirParsePassPipeline(opm, invalidPipeline, printToStderr, NULL))) + exit(EXIT_FAILURE); + fprintf(stderr, "\n"); + // CHECK: mlirOpPassManagerAddPipeline: // CHECK: 'invalid' does not refer to a registered pass or pass pipeline fprintf(stderr, "mlirOpPassManagerAddPipeline:\n"); @@ -253,6 +261,9 @@ void testParseErrorCapture() { // Make sure all output is going through the callback. // CHECK: dontPrint: <> fprintf(stderr, "dontPrint: <"); + if (mlirLogicalResultIsSuccess( + mlirParsePassPipeline(opm, invalidPipeline, dontPrint, NULL))) + exit(EXIT_FAILURE); if (mlirLogicalResultIsSuccess( mlirOpPassManagerAddPipeline(opm, invalidPipeline, dontPrint, NULL))) exit(EXIT_FAILURE); From 66645a03fc8a36b5052113f1466e8baf01137623 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Wed, 19 Oct 2022 23:36:15 -0400 Subject: [PATCH 158/516] [mlir][python] Include anchor op in PassManager.parse The pipeline string must now include the pass manager's anchor op. This makes the parse API properly roundtrip the printed form of a pass manager. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D136405 --- mlir/lib/Bindings/Python/Pass.cpp | 2 +- .../Dialect/SparseTensor/python/tools/sparse_compiler.py | 2 +- .../Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py | 2 +- mlir/test/python/dialects/async_dialect.py | 2 +- mlir/test/python/dialects/gpu.py | 2 +- mlir/test/python/dialects/sparse_tensor/passes.py | 4 ++-- mlir/test/python/execution_engine.py | 2 +- mlir/test/python/integration/dialects/linalg/opsrun.py | 4 ++-- mlir/test/python/pass_manager.py | 6 +++--- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index 99d67582d1780..f08a4bd2daa7d 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -85,7 +85,7 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { [](const std::string &pipeline, DefaultingPyMlirContext context) { MlirPassManager passManager = mlirPassManagerCreate(context->get()); PyPrintAccumulator errorMsg; - MlirLogicalResult status = mlirOpPassManagerAddPipeline( + MlirLogicalResult status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(passManager), mlirStringRefCreate(pipeline.data(), pipeline.size()), errorMsg.getCallback(), errorMsg.getUserData()); diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py b/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py index 174e847f72c23..abdab9738def7 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py @@ -13,7 +13,7 @@ class SparseCompiler: """Sparse compiler class for compiling and building MLIR modules.""" def __init__(self, options: str, opt_level: int, shared_libs: Sequence[str]): - pipeline = f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}' + pipeline = f'builtin.module(sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}})' self.pipeline = pipeline self.opt_level = opt_level self.shared_libs = shared_libs diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py index 6f117f386f531..1ba0d393894b9 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py @@ -16,7 +16,7 @@ class SparseCompiler: """Sparse compiler class for compiling and building MLIR modules.""" def __init__(self, options: str, opt_level: int, shared_libs: Sequence[str]): - pipeline = f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}' + pipeline = f'builtin.module(sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}})' self.pipeline = pipeline self.opt_level = opt_level self.shared_libs = shared_libs diff --git a/mlir/test/python/dialects/async_dialect.py b/mlir/test/python/dialects/async_dialect.py index 6a33bd6b6d030..da3103cecddf2 100644 --- a/mlir/test/python/dialects/async_dialect.py +++ b/mlir/test/python/dialects/async_dialect.py @@ -11,7 +11,7 @@ def run(f): def testAsyncPass(): with Context() as context: - PassManager.parse('async-to-async-runtime') + PassManager.parse('any(async-to-async-runtime)') print('SUCCESS') # CHECK-LABEL: testAsyncPass diff --git a/mlir/test/python/dialects/gpu.py b/mlir/test/python/dialects/gpu.py index edf59dfc9c8fb..38bf038a5eeed 100644 --- a/mlir/test/python/dialects/gpu.py +++ b/mlir/test/python/dialects/gpu.py @@ -11,7 +11,7 @@ def run(f): def testGPUPass(): with Context() as context: - PassManager.parse('gpu-kernel-outlining') + PassManager.parse('any(gpu-kernel-outlining)') print('SUCCESS') # CHECK-LABEL: testGPUPass diff --git a/mlir/test/python/dialects/sparse_tensor/passes.py b/mlir/test/python/dialects/sparse_tensor/passes.py index f3510c5ce8169..9319e16e054de 100644 --- a/mlir/test/python/dialects/sparse_tensor/passes.py +++ b/mlir/test/python/dialects/sparse_tensor/passes.py @@ -16,7 +16,7 @@ def run(f): @run def testSparseTensorPass(): with Context() as context: - PassManager.parse('sparsification') - PassManager.parse('sparse-tensor-conversion') + PassManager.parse('any(sparsification)') + PassManager.parse('any(sparse-tensor-conversion)') # CHECK: SUCCESS print('SUCCESS') diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index c518803789637..7b7ee953ea193 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -63,7 +63,7 @@ def testInvalidModule(): def lowerToLLVM(module): pm = PassManager.parse( - "convert-complex-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts") + "builtin.module(convert-complex-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)") pm.run(module) return module diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py index e22f5a0ea3d87..2075ecfc21d01 100644 --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -192,10 +192,10 @@ def transform(module, boilerplate): mod = Module.parse("\n".join([str(op) for op in ops]) + boilerplate) pm = PassManager.parse( - "func.func(convert-linalg-to-loops, lower-affine, " + + "builtin.module(func.func(convert-linalg-to-loops, lower-affine, " + "convert-math-to-llvm, convert-scf-to-cf, arith-expand, memref-expand), " + "convert-vector-to-llvm, convert-memref-to-llvm, convert-func-to-llvm," + - "reconcile-unrealized-casts") + "reconcile-unrealized-casts)") pm.run(mod) return mod diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index a2d56a1f6e031..99170cd042b67 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -44,7 +44,7 @@ def testParseSuccess(): # A registered pass should parse successfully. pm = PassManager.parse("builtin.module(func.func(print-op-stats{json=false}))") - # CHECK: Roundtrip: builtin.module(builtin.module(func.func(print-op-stats{json=false}))) + # CHECK: Roundtrip: builtin.module(func.func(print-op-stats{json=false})) log("Roundtrip: ", pm) run(testParseSuccess) @@ -53,7 +53,7 @@ def testParseSuccess(): def testParseFail(): with Context(): try: - pm = PassManager.parse("unknown-pass") + pm = PassManager.parse("any(unknown-pass)") except ValueError as e: # CHECK: ValueError exception: MLIR Textual PassPipeline Parser:1:1: error: # CHECK-SAME: 'unknown-pass' does not refer to a registered pass or pass pipeline @@ -83,7 +83,7 @@ def testInvalidNesting(): # CHECK-LABEL: TEST: testRun def testRunPipeline(): with Context(): - pm = PassManager.parse("print-op-stats{json=false}") + pm = PassManager.parse("builtin.module(print-op-stats{json=false})") module = Module.parse(r"""func.func @successfulParse() { return }""") pm.run(module) # CHECK: Operations encountered: From d97e8cd48239ba6f3e50f92b152e661656ea009d Mon Sep 17 00:00:00 2001 From: rkayaith Date: Thu, 20 Oct 2022 01:04:34 -0400 Subject: [PATCH 159/516] [mlir][python] Include anchor op in PassManager constructor This adds an extra argument for specifying the pass manager's anchor op, with a default of `any`. Previously the anchor was always defaulted to `builtin.module`. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D136406 --- mlir/lib/Bindings/Python/Pass.cpp | 9 ++++++--- mlir/test/python/pass_manager.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index f08a4bd2daa7d..13f1cfa3536ac 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -56,11 +56,14 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { // Mapping of the top-level PassManager //---------------------------------------------------------------------------- py::class_(m, "PassManager", py::module_local()) - .def(py::init<>([](DefaultingPyMlirContext context) { - MlirPassManager passManager = - mlirPassManagerCreate(context->get()); + .def(py::init<>([](const std::string &anchorOp, + DefaultingPyMlirContext context) { + MlirPassManager passManager = mlirPassManagerCreateOnOperation( + context->get(), + mlirStringRefCreate(anchorOp.data(), anchorOp.size())); return new PyPassManager(passManager); }), + py::arg("anchor_op") = py::str("any"), py::arg("context") = py::none(), "Create a new PassManager for the current (or provided) Context.") .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index 99170cd042b67..04e325e13e785 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -28,6 +28,17 @@ def testCapsule(): assert pm1 is not None # And does not crash. run(testCapsule) +# CHECK-LABEL: TEST: testConstruct +@run +def testConstruct(): + with Context(): + # CHECK: pm1: 'any()' + # CHECK: pm2: 'builtin.module()' + pm1 = PassManager() + pm2 = PassManager("builtin.module") + log(f"pm1: '{pm1}'") + log(f"pm2: '{pm2}'") + # Verify successful round-trip. # CHECK-LABEL: TEST: testParseSuccess From 473d00115244ae8bfc3049a32262a347aeccb460 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Wed, 2 Nov 2022 12:47:50 -0700 Subject: [PATCH 160/516] [mlir][llvmir] Convert attributes for functions without bodies. So far the function argument attributes were only translated for functions with bodies. This change makes sure that this happens for functions without bodies (declarations) as well. This is needed for https://github.com/llvm/llvm-project/issues/58579 Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D137047 --- flang/test/Fir/target.fir | 4 +- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 201 ++++++++++--------- mlir/test/Dialect/LLVMIR/func.mlir | 18 ++ mlir/test/Target/LLVMIR/llvmir.mlir | 34 ++++ 4 files changed, 155 insertions(+), 102 deletions(-) diff --git a/flang/test/Fir/target.fir b/flang/test/Fir/target.fir index 831c75379aa07..f6cf0587fcae9 100644 --- a/flang/test/Fir/target.fir +++ b/flang/test/Fir/target.fir @@ -48,13 +48,13 @@ func.func @gen8() -> !fir.complex<8> { return %5 : !fir.complex<8> } -// I32: declare void @sink4(ptr) +// I32: declare void @sink4(ptr byval({ float, float }) align 4) // X64: declare void @sink4(<2 x float>) // AARCH64: declare void @sink4([2 x float]) // PPC: declare void @sink4(float, float) func.func private @sink4(!fir.complex<4>) -> () -// I32: declare void @sink8(ptr) +// I32: declare void @sink8(ptr byval({ double, double }) align 4) // X64: declare void @sink8(double, double) // AARCH64: declare void @sink8([2 x double]) // PPC: declare void @sink8(double, double) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index d1cdc77e4b968..3f808a5d05f7d 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -826,107 +826,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { debugTranslation->translate(func, *llvmFunc); // Add function arguments to the value remapping table. - // If there was noalias info then we decorate each argument accordingly. - unsigned int argIdx = 0; - for (auto kvp : llvm::zip(func.getArguments(), llvmFunc->args())) { - llvm::Argument &llvmArg = std::get<1>(kvp); - BlockArgument mlirArg = std::get<0>(kvp); - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getNoAliasAttrName())) { - // NB: Attribute already verified to be boolean, so check if we can indeed - // attach the attribute to this argument, based on its type. - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.noalias attribute attached to LLVM non-pointer argument"); - llvmArg.addAttr(llvm::Attribute::AttrKind::NoAlias); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getAlignAttrName())) { - // NB: Attribute already verified to be int, so check if we can indeed - // attach the attribute to this argument, based on its type. - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.align attribute attached to LLVM non-pointer argument"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAlignmentAttr(llvm::Align(attr.getInt()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getStructRetAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.sret attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.sret attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addStructRetAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getByValAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.byval attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.byval attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addByValAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getByRefAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.byref attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.byref attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addByRefAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getInAllocaAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.inalloca attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError( - "llvm.inalloca attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addInAllocaAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType(argIdx, "llvm.nest")) { - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.nest attribute attached to LLVM non-pointer argument"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAttribute(llvm::Attribute::Nest)); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getNoUndefAttrName())) { - // llvm.noundef can be added to any argument type. - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAttribute(llvm::Attribute::NoUndef)); - } - + for (auto [mlirArg, llvmArg] : + llvm::zip(func.getArguments(), llvmFunc->args())) mapValue(mlirArg, &llvmArg); - argIdx++; - } // Check the personality and set it. if (func.getPersonality()) { @@ -986,6 +888,105 @@ LogicalResult ModuleTranslation::convertFunctionSignatures() { if (function->getAttrOfType(LLVMDialect::getReadnoneAttrName())) llvmFunc->setDoesNotAccessMemory(); + // Convert argument attributes. + unsigned int argIdx = 0; + for (auto [mlirArgTy, llvmArg] : + llvm::zip(function.getArgumentTypes(), llvmFunc->args())) { + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getNoAliasAttrName())) { + // NB: Attribute already verified to be boolean, so check if we can + // indeed attach the attribute to this argument, based on its type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.noalias attribute attached to LLVM non-pointer argument"); + llvmArg.addAttr(llvm::Attribute::AttrKind::NoAlias); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getAlignAttrName())) { + // NB: Attribute already verified to be int, so check if we can indeed + // attach the attribute to this argument, based on its type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.align attribute attached to LLVM non-pointer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAlignmentAttr(llvm::Align(attr.getInt()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getStructRetAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.sret attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.sret attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addStructRetAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getByValAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.byval attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.byval attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addByValAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getByRefAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.byref attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.byref attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addByRefAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getInAllocaAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.inalloca attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.inalloca attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addInAllocaAttr(convertType(attr.getValue()))); + } + + if (auto attr = + function.getArgAttrOfType(argIdx, "llvm.nest")) { + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.nest attribute attached to LLVM non-pointer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::Nest)); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getNoUndefAttrName())) { + // llvm.noundef can be added to any argument type. + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::NoUndef)); + } + ++argIdx; + } + // Forward the pass-through attributes to LLVM. if (failed(forwardPassthroughAttributes( function.getLoc(), function.getPassthrough(), llvmFunc))) diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 17cc6bf564793..7746d5c04b811 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -104,6 +104,24 @@ module { llvm.return } + // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK: llvm.func @noundefattr_decl(i32 {llvm.noundef}) + llvm.func @noundefattr_decl(i32 {llvm.noundef}) + // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) + llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + + // CHECK: llvm.func @variadic(...) llvm.func @variadic(...) diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 54eb0f5e04c2c..84c750abcfd73 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1049,31 +1049,65 @@ llvm.func @llvm_noalias(%arg0: !llvm.ptr {llvm.noalias}) { llvm.return } +// CHECK-LABEL: declare void @llvm_noalias_decl(ptr noalias) +llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + +// CHECK-LABEL: define void @byrefattr(ptr byref(i32) % +llvm.func @byrefattr(%arg0: !llvm.ptr {llvm.byref = i32}) { + llvm.return +} + +// CHECK-LABEL: declare void @byrefattr_decl(ptr byref(i32)) +llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK-LABEL: define void @byvalattr(ptr byval(i32) % llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { llvm.return } +// CHECK-LABEL: declare void @byvalattr_decl(ptr byval(i32)) +llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK-LABEL: define void @sretattr(ptr sret(i32) % llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32}) { llvm.return } +// CHECK-LABEL: declare void @sretattr_decl(ptr sret(i32)) +llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK-LABEL: define void @nestattr(ptr nest % llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { llvm.return } +// CHECK-LABEL: declare void @nestattr_decl(ptr nest) +llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK-LABEL: define void @noundefattr(i32 noundef % llvm.func @noundefattr(%arg0: i32 {llvm.noundef}) { llvm.return } +// CHECK-LABEL: declare void @noundefattr_decl(i32 noundef) +llvm.func @noundefattr_decl(i32 {llvm.noundef}) + // CHECK-LABEL: define void @llvm_align(ptr align 4 {{%*.}}) llvm.func @llvm_align(%arg0: !llvm.ptr {llvm.align = 4}) { llvm.return } +// CHECK-LABEL: declare void @llvm_align_decl(ptr align 4) +llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + +// CHECK-LABEL: define void @inallocaattr(ptr inalloca(i32) % +llvm.func @inallocaattr(%arg0: !llvm.ptr {llvm.inalloca = i32}) { + llvm.return +} + +// CHECK-LABEL: declare void @inallocaattr_decl(ptr inalloca(i32)) +llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + // CHECK-LABEL: @llvm_varargs(...) llvm.func @llvm_varargs(...) From e7deca525058778df15e7888ed24974a32c8686c Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 3 Nov 2022 16:06:35 +0000 Subject: [PATCH 161/516] [AArch64] Alter arm_fp16.h to be target-based, not preprocessor based. As the other recent patches, this alters the arm_fp16 intrinsics to be target based, not preprocessor based. Apparently arm_fp16.h is AArch64 only under clang, making this mostly trivial with the TargetGuard infrastructure. Differential Revision: https://reviews.llvm.org/D137256 --- clang/include/clang/Basic/arm_fp16.td | 2 +- clang/test/Sema/aarch64-fp16-target.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index 79cd16233c104..cb2a09303e8e1 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -14,7 +14,7 @@ include "arm_neon_incl.td" // ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { // Negate def VNEGSH : SInst<"vneg", "11", "Sh">; diff --git a/clang/test/Sema/aarch64-fp16-target.c b/clang/test/Sema/aarch64-fp16-target.c index 13cee64c52f56..9a921e96e88e5 100644 --- a/clang/test/Sema/aarch64-fp16-target.c +++ b/clang/test/Sema/aarch64-fp16-target.c @@ -7,19 +7,19 @@ __attribute__((target("fullfp16"))) void test_fullfp16(float16_t f16) { - vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} + vabdh_f16(f16, f16); } __attribute__((target("arch=armv8-a+fp16"))) void test_fp16_arch(float16_t f16) { - vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} + vabdh_f16(f16, f16); } __attribute__((target("+fp16"))) void test_fp16(float16_t f16) { - vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} + vabdh_f16(f16, f16); } void undefined(float16_t f16) { - vabdh_f16(f16, f16); // expected-error {{call to undeclared function 'vabdh_f16'}} + vabdh_f16(f16, f16); // expected-error {{'__builtin_neon_vabdh_f16' needs target feature fullfp16}} } From 0eb2f663d27432111e3f0f68a0d586ade4c8036e Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Tue, 25 Oct 2022 18:43:00 -0700 Subject: [PATCH 162/516] [RISCV][CodeGen] Account for LMUL for Vector Integer Arithmetic Instructions It is likley that subtargets act differently for a vector integer arithmetic instruction based on the LMUL. This patch creates seperate SchedRead, SchedWrite, WriteRes, ReadAdvance for each relevant LMUL. It also introduces the concept of an "UpperBound LMUL" which allows us to describe how an instruction should behave when the LMUL is unknown. All base instructions use the UpperBound resources because they are not tied to a specific LMUL. This gives subtargetes the flexibility to describe their own upper bounds on each vector instruction. I have a series of patches for the rest of the vector instruction set ready to go, but I would like to first get feedback on the first one of the series (this one). Differential Revision: https://reviews.llvm.org/D136730 --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 146 ++-- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 718 ++++++++++++------ llvm/lib/Target/RISCV/RISCVScheduleV.td | 287 ++++--- 3 files changed, 750 insertions(+), 401 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 65e36e0aa3b8c..1b4813720d97b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -402,97 +402,124 @@ multiclass VIndexLoadStore EEWList> { multiclass VALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound, + ReadVMask]>; } multiclass VALU_IV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; } multiclass VALU_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { def X : VALUVX, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound, + ReadVMask]>; } multiclass VALU_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + Sched<[WriteVIWALUV_UpperBound, ReadVIWALUV_UpperBound, + ReadVIWALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + Sched<[WriteVIWALUX_UpperBound, ReadVIWALUV_UpperBound, + ReadVIWALUX_UpperBound, ReadVMask]>; } multiclass VMAC_MV_V_X funct6, string vw = "v"> { def V : VALUrVV, - Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + Sched<[WriteVIMulAddV_UpperBound, ReadVIMulAddV_UpperBound, + ReadVIMulAddV_UpperBound, ReadVMask]>; def X : VALUrVX, - Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; + Sched<[WriteVIMulAddX_UpperBound, ReadVIMulAddV_UpperBound, + ReadVIMulAddX_UpperBound, ReadVMask]>; } multiclass VWMAC_MV_V_X funct6, string vw = "v"> { def V : VALUrVV, - Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + Sched<[WriteVIWMulAddV_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddV_UpperBound, ReadVMask]>; def X : VALUrVX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddX_UpperBound, ReadVMask]>; } multiclass VWMAC_MV_X funct6, string vw = "v"> { def X : VALUrVX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddX_UpperBound, ReadVMask]>; } multiclass VALU_MV_VS2 funct6, bits<5> vs1> { def "" : VALUVs2, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_UpperBound, ReadVExtV_UpperBound, + ReadVMask]>; } multiclass VALUm_IV_V_X_I funct6> { def VM : VALUmVV, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound, ReadVMask]>; def IM : VALUmVI, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound, + ReadVMask]>; } multiclass VMRG_IV_V_X_I funct6> { def VM : VALUmVV, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + Sched<[WriteVIMergeV_UpperBound, ReadVIMergeV_UpperBound, + ReadVIMergeV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + Sched<[WriteVIMergeX_UpperBound, ReadVIMergeV_UpperBound, + ReadVIMergeX_UpperBound, ReadVMask]>; def IM : VALUmVI, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + Sched<[WriteVIMergeI_UpperBound, ReadVIMergeV_UpperBound, + ReadVMask]>; } multiclass VALUm_IV_V_X funct6> { def VM : VALUmVV, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound, ReadVMask]>; } multiclass VALUNoVm_IV_V_X_I funct6, Operand optype = simm5> { def V : VALUVVNoVm, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound]>; def X : VALUVXNoVm, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound + , ReadVICALUX_UpperBound]>; def I : VALUVINoVm, - Sched<[WriteVICALUI, ReadVICALUV]>; + Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound]>; } multiclass VALUNoVm_IV_V_X funct6> { def V : VALUVVNoVm, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound]>; def X : VALUVXNoVm, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound]>; } multiclass VALU_FV_V_F funct6, string vw = "v"> { @@ -675,64 +702,83 @@ multiclass VMIOT_MV_V funct6, bits<5> vs1> { multiclass VSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + Sched<[WriteVShiftV_UpperBound, ReadVShiftV_UpperBound, + ReadVShiftV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + Sched<[WriteVShiftX_UpperBound, ReadVShiftV_UpperBound, + ReadVShiftX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; + Sched<[WriteVShiftI_UpperBound, ReadVShiftV_UpperBound, + ReadVMask]>; } multiclass VNSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + Sched<[WriteVNShiftV_UpperBound, ReadVNShiftV_UpperBound, + ReadVNShiftV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + Sched<[WriteVNShiftX_UpperBound, ReadVNShiftV_UpperBound, + ReadVNShiftX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; + Sched<[WriteVNShiftI_UpperBound, ReadVNShiftV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { def X : VALUVX, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; } multiclass VMUL_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + Sched<[WriteVIMulV_UpperBound, ReadVIMulV_UpperBound, + ReadVIMulV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; + Sched<[WriteVIMulX_UpperBound, ReadVIMulV_UpperBound, + ReadVIMulX_UpperBound, ReadVMask]>; } multiclass VWMUL_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + Sched<[WriteVIWMulV_UpperBound, ReadVIWMulV_UpperBound, + ReadVIWMulV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; + Sched<[WriteVIWMulX_UpperBound, ReadVIWMulV_UpperBound, + ReadVIWMulX_UpperBound, ReadVMask]>; } multiclass VDIV_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + Sched<[WriteVIDivV_UpperBound, ReadVIDivV_UpperBound, + ReadVIDivV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; + Sched<[WriteVIDivX_UpperBound, ReadVIDivV_UpperBound, + ReadVIDivX_UpperBound, ReadVMask]>; } multiclass VSALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { @@ -1126,15 +1172,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, - Sched<[WriteVIMovV, ReadVIMovV]>; + Sched<[WriteVIMovV_UpperBound, ReadVIMovV_UpperBound]>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, - Sched<[WriteVIMovX, ReadVIMovX]>; + Sched<[WriteVIMovX_UpperBound, ReadVIMovX_UpperBound]>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, - Sched<[WriteVIMovI]>; + Sched<[WriteVIMovI_UpperBound]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 7c765dd3548bc..06169022a0fa5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1914,6 +1914,10 @@ multiclass VPseudoBinaryV_VV { defm _VV : VPseudoBinary; } +multiclass VPseudoBinaryV_VV_LMUL { + defm _VV : VPseudoBinary; +} + // Similar to VPseudoBinaryV_VV, but uses MxListF. multiclass VPseudoBinaryFV_VV { foreach m = MxListF in @@ -1941,6 +1945,10 @@ multiclass VPseudoBinaryV_VX { defm "_VX" : VPseudoBinary; } +multiclass VPseudoBinaryV_VX_LMUL { + defm "_VX" : VPseudoBinary; +} + multiclass VPseudoVSLD1_VX { foreach m = MxList in defm "_VX" : VPseudoBinary, @@ -1967,6 +1975,10 @@ multiclass VPseudoBinaryV_VI { defm _VI : VPseudoBinary; } +multiclass VPseudoBinaryV_VI_LMUL { + defm _VI : VPseudoBinary; +} + multiclass VPseudoVALU_MM { foreach m = MxList in let VLMul = m.value in { @@ -1988,10 +2000,14 @@ multiclass VPseudoBinaryW_VV mxlist = MxListW> { "@earlyclobber $rd">; } -multiclass VPseudoBinaryW_VX { - foreach m = MxListW in - defm "_VX" : VPseudoBinary; +multiclass VPseudoBinaryW_VV_LMUL { + defm _VV : VPseudoBinary; +} + +multiclass VPseudoBinaryW_VX_LMUL { + defm "_VX" : VPseudoBinary; } multiclass VPseudoBinaryW_VF { @@ -2011,9 +2027,15 @@ multiclass VPseudoBinaryW_WV mxlist = MxListW> { } } -multiclass VPseudoBinaryW_WX { - foreach m = MxListW in - defm "_WX" : VPseudoBinary; +multiclass VPseudoBinaryW_WV_LMUL { + defm _WV : VPseudoBinary; + defm _WV : VPseudoTiedBinary; +} + +multiclass VPseudoBinaryW_WX_LMUL { + defm "_WX" : VPseudoBinary; } multiclass VPseudoBinaryW_WF { @@ -2034,59 +2056,70 @@ multiclass VPseudoBinaryV_WV { !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>; } +multiclass VPseudoBinaryV_WV_LMUL { + defm _WV : VPseudoBinary; +} + multiclass VPseudoBinaryV_WX { foreach m = MxListW in defm _WX : VPseudoBinary; } +multiclass VPseudoBinaryV_WX_LMUL { + defm _WX : VPseudoBinary; +} + multiclass VPseudoBinaryV_WI { foreach m = MxListW in defm _WI : VPseudoBinary; } +multiclass VPseudoBinaryV_WI_LMUL { + defm _WI : VPseudoBinary; +} + // For vadc and vsbc, the instruction encoding is reserved if the destination // vector register is v0. // For vadc and vsbc, CarryIn == 1 and CarryOut == 0 -multiclass VPseudoBinaryV_VM { - foreach m = MxList in - def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint>; + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_VM { - foreach m = MxList in - def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint>; + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; } -multiclass VPseudoBinaryV_XM { - foreach m = MxList in - def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_XM { - foreach m = MxList in - def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; } multiclass VPseudoVMRG_FM { @@ -2104,41 +2137,48 @@ multiclass VPseudoVMRG_FM { } } -multiclass VPseudoBinaryV_IM { - foreach m = MxList in - def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint>; + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_IM { - foreach m = MxList in - def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint>; + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; } multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovV, ReadVIMovV]>; - def "_X_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovX, ReadVIMovX]>; - def "_I_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovI]>; - def "_V_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovV, ReadVIMovV]>; - def "_X_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovX, ReadVIMovX]>; - def "_I_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovI]>; + defvar mx = m.MX; + defvar WriteVIMovV_MX = !cast("WriteVIMovV_" # mx); + defvar WriteVIMovX_MX = !cast("WriteVIMovX_" # mx); + defvar WriteVIMovI_MX = !cast("WriteVIMovI_" # mx); + defvar ReadVIMovV_MX = !cast("ReadVIMovV_" # mx); + defvar ReadVIMovX_MX = !cast("ReadVIMovX_" # mx); + + let VLMul = m.value in { + def "_V_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; + def "_X_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; + def "_I_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovI_MX]>; + def "_V_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; + def "_X_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; + def "_I_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovI_MX]>; + } } } } @@ -2204,15 +2244,19 @@ multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2221,15 +2265,19 @@ multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2238,15 +2286,19 @@ multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2268,11 +2320,15 @@ multiclass VPseudoBinaryM_VV mxlist = MxList> { !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryM_VX { - foreach m = MxList in - defm "_VX" : - VPseudoBinaryM; +multiclass VPseudoBinaryM_VV_LMUL { + defm _VV : VPseudoBinaryM; +} + +multiclass VPseudoBinaryM_VX { + defm "_VX" : + VPseudoBinaryM; } multiclass VPseudoBinaryM_VF { @@ -2283,10 +2339,9 @@ multiclass VPseudoBinaryM_VF { !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryM_VI { - foreach m = MxList in - defm _VI : VPseudoBinaryM; +multiclass VPseudoBinaryM_VI { + defm _VI : VPseudoBinaryM; } multiclass VPseudoVGTR_VV_VX_VI { @@ -2309,12 +2364,21 @@ multiclass VPseudoVSALU_VV_VX_VI { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVShiftV_MX = !cast("WriteVShiftV_" # mx); + defvar WriteVShiftX_MX = !cast("WriteVShiftX_" # mx); + defvar WriteVShiftI_MX = !cast("WriteVShiftI_" # mx); + defvar ReadVShiftV_MX = !cast("ReadVShiftV_" # mx); + defvar ReadVShiftX_MX = !cast("ReadVShiftX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVShiftV_MX, ReadVShiftV_MX, ReadVShiftV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVShiftX_MX, ReadVShiftV_MX, ReadVShiftX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVShiftI_MX, ReadVShiftV_MX, ReadVMask]>; + } } multiclass VPseudoVSSHT_VV_VX_VI { @@ -2327,12 +2391,21 @@ multiclass VPseudoVSSHT_VV_VX_VI { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast("WriteVIALUV_" # mx); + defvar WriteVIALUX_MX = !cast("WriteVIALUX_" # mx); + defvar WriteVIALUI_MX = !cast("WriteVIALUI_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + } } multiclass VPseudoVSALU_VV_VX { @@ -2357,24 +2430,48 @@ multiclass VPseudoVAALU_VV_VX { } multiclass VPseudoVMINMAX_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + } } multiclass VPseudoVMUL_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMulV_MX = !cast("WriteVIMulV_" # mx); + defvar WriteVIMulX_MX = !cast("WriteVIMulX_" # mx); + defvar ReadVIMulV_MX = !cast("ReadVIMulV_" # mx); + defvar ReadVIMulX_MX = !cast("ReadVIMulX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIMulV_MX, ReadVIMulV_MX, ReadVIMulV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIMulX_MX, ReadVIMulV_MX, ReadVIMulX_MX, ReadVMask]>; + } } multiclass VPseudoVDIV_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIDivV_MX = !cast("WriteVIDivV_" # mx); + defvar WriteVIDivX_MX = !cast("WriteVIDivX_" # mx); + defvar ReadVIDivV_MX = !cast("ReadVIDivV_" # mx); + defvar ReadVIDivX_MX = !cast("ReadVIDivX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIDivV_MX, ReadVIDivV_MX, ReadVIDivV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIDivX_MX, ReadVIDivV_MX, ReadVIDivX_MX, ReadVMask]>; + } } multiclass VPseudoVFMUL_VV_VF { @@ -2397,10 +2494,18 @@ multiclass VPseudoVFRDIV_VF { } multiclass VPseudoVALU_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast("WriteVIALUV_" # mx); + defvar WriteVIALUX_MX = !cast("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + } } multiclass VPseudoVSGNJ_VV_VF { @@ -2430,24 +2535,48 @@ multiclass VPseudoVALU_VF { } multiclass VPseudoVALU_VX_VI { - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUX_MX = !cast("WriteVIALUX_" # mx); + defvar WriteVIALUI_MX = !cast("WriteVIALUI_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + } } multiclass VPseudoVWALU_VV_VX { - defm "" : VPseudoBinaryW_VV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; - defm "" : VPseudoBinaryW_VX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWALUV_MX = !cast("WriteVIWALUV_" # mx); + defvar WriteVIWALUX_MX = !cast("WriteVIWALUX_" # mx); + defvar ReadVIWALUV_MX = !cast("ReadVIWALUV_" # mx); + defvar ReadVIWALUX_MX = !cast("ReadVIWALUX_" # mx); + + defm "" : VPseudoBinaryW_VV_LMUL, + Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_VX_LMUL, + Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + } } multiclass VPseudoVWMUL_VV_VX { - defm "" : VPseudoBinaryW_VV, - Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; - defm "" : VPseudoBinaryW_VX, - Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulV_MX = !cast("WriteVIWMulV_" # mx); + defvar WriteVIWMulX_MX = !cast("WriteVIWMulX_" # mx); + defvar ReadVIWMulV_MX = !cast("ReadVIWMulV_" # mx); + defvar ReadVIWMulX_MX = !cast("ReadVIWMulX_" # mx); + + defm "" : VPseudoBinaryW_VV_LMUL, + Sched<[WriteVIWMulV_MX, ReadVIWMulV_MX, ReadVIWMulV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_VX_LMUL, + Sched<[WriteVIWMulX_MX, ReadVIWMulV_MX, ReadVIWMulX_MX, ReadVMask]>; + } } multiclass VPseudoVWMUL_VV_VF { @@ -2458,10 +2587,18 @@ multiclass VPseudoVWMUL_VV_VF { } multiclass VPseudoVWALU_WV_WX { - defm "" : VPseudoBinaryW_WV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; - defm "" : VPseudoBinaryW_WX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWALUV_MX = !cast("WriteVIWALUV_" # mx); + defvar WriteVIWALUX_MX = !cast("WriteVIWALUX_" # mx); + defvar ReadVIWALUV_MX = !cast("ReadVIWALUV_" # mx); + defvar ReadVIWALUX_MX = !cast("ReadVIWALUX_" # mx); + + defm "" : VPseudoBinaryW_WV_LMUL, + Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_WX_LMUL, + Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + } } multiclass VPseudoVFWALU_VV_VF { @@ -2479,79 +2616,139 @@ multiclass VPseudoVFWALU_WV_WF { } multiclass VPseudoVMRG_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_IM, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMergeV_MX = !cast("WriteVIMergeV_" # mx); + defvar WriteVIMergeX_MX = !cast("WriteVIMergeX_" # mx); + defvar WriteVIMergeI_MX = !cast("WriteVIMergeI_" # mx); + defvar ReadVIMergeV_MX = !cast("ReadVIMergeV_" # mx); + defvar ReadVIMergeX_MX = !cast("ReadVIMergeX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>; + } } multiclass VPseudoVCALU_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + } } multiclass VPseudoVCALU_VM_XM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_VM_XM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_V_X_I { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>; + } } multiclass VPseudoVCALUM_V_X { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + } } multiclass VPseudoVNCLP_WV_WX_WI { @@ -2564,12 +2761,21 @@ multiclass VPseudoVNCLP_WV_WX_WI { } multiclass VPseudoVNSHT_WV_WX_WI { - defm "" : VPseudoBinaryV_WV, - Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; - defm "" : VPseudoBinaryV_WX, - Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; - defm "" : VPseudoBinaryV_WI, - Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVNShiftV_MX = !cast("WriteVNShiftV_" # mx); + defvar WriteVNShiftX_MX = !cast("WriteVNShiftX_" # mx); + defvar WriteVNShiftI_MX = !cast("WriteVNShiftI_" # mx); + defvar ReadVNShiftV_MX = !cast("ReadVNShiftV_" # mx); + defvar ReadVNShiftX_MX = !cast("ReadVNShiftX_" # mx); + + defm "" : VPseudoBinaryV_WV_LMUL, + Sched<[WriteVNShiftV_MX, ReadVNShiftV_MX, ReadVNShiftV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_WX_LMUL, + Sched<[WriteVNShiftX_MX, ReadVNShiftV_MX, ReadVNShiftX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_WI_LMUL, + Sched<[WriteVNShiftI_MX, ReadVNShiftV_MX, ReadVMask]>; + } } multiclass VPseudoTernary { + defm _VV : VPseudoTernaryWithPolicy; +} + multiclass VPseudoVSLDV_VX { foreach m = MxList in defm _VX : VPseudoTernaryWithPolicy; } -multiclass VPseudoTernaryV_VX_AAXA { - foreach m = MxList in - defm "_VX" : VPseudoTernaryWithPolicy; +multiclass VPseudoTernaryV_VX_AAXA { + defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryV_VF_AAXA { @@ -2643,11 +2853,16 @@ multiclass VPseudoTernaryW_VV mxlist = MxListW> { constraint>; } -multiclass VPseudoTernaryW_VX { +multiclass VPseudoTernaryW_VV_LMUL { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in - defm "_VX" : VPseudoTernaryWithPolicy; + defm _VV : VPseudoTernaryWithPolicy; +} + +multiclass VPseudoTernaryW_VX { + defvar constraint = "@earlyclobber $rd"; + defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryW_VF { @@ -2664,10 +2879,20 @@ multiclass VPseudoVSLDV_VI { } multiclass VPseudoVMAC_VV_VX_AAXA { - defm "" : VPseudoTernaryV_VV_AAXA, - Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; - defm "" : VPseudoTernaryV_VX_AAXA, - Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMulAddV_MX = !cast("WriteVIMulAddV_" # mx); + defvar WriteVIMulAddX_MX = !cast("WriteVIMulAddX_" # mx); + defvar ReadVIMulAddV_MX = !cast("ReadVIMulAddV_" # mx); + defvar ReadVIMulAddX_MX = !cast("ReadVIMulAddX_" # mx); + + defm "" : VPseudoTernaryV_VV_AAXA_LMUL, + Sched<[WriteVIMulAddV_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, + ReadVIMulAddV_MX, ReadVMask]>; + defm "" : VPseudoTernaryV_VX_AAXA, + Sched<[WriteVIMulAddX_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, + ReadVIMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVMAC_VV_VF_AAXA { @@ -2685,15 +2910,33 @@ multiclass VPseudoVSLD_VX_VI { } multiclass VPseudoVWMAC_VV_VX { - defm "" : VPseudoTernaryW_VV, - Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; - defm "" : VPseudoTernaryW_VX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulAddV_MX = !cast("WriteVIWMulAddV_" # mx); + defvar WriteVIWMulAddX_MX = !cast("WriteVIWMulAddX_" # mx); + defvar ReadVIWMulAddV_MX = !cast("ReadVIWMulAddV_" # mx); + defvar ReadVIWMulAddX_MX = !cast("ReadVIWMulAddX_" # mx); + + defm "" : VPseudoTernaryW_VV_LMUL, + Sched<[WriteVIWMulAddV_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddV_MX, ReadVMask]>; + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVWMAC_VX { - defm "" : VPseudoTernaryW_VX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulAddX_MX = !cast("WriteVIWMulAddX_" # mx); + defvar ReadVIWMulAddV_MX= !cast("ReadVIWMulAddV_" # mx); + defvar ReadVIWMulAddX_MX = !cast("ReadVIWMulAddX_" # mx); + + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVWMAC_VV_VF { @@ -2704,19 +2947,36 @@ multiclass VPseudoVWMAC_VV_VF { } multiclass VPseudoVCMPM_VV_VX_VI { - defm "" : VPseudoBinaryM_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar WriteVICmpI_MX = !cast("WriteVICmpI_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + } } multiclass VPseudoVCMPM_VV_VX { - defm "" : VPseudoBinaryM_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + } } multiclass VPseudoVCMPM_VV_VF { @@ -2732,10 +2992,18 @@ multiclass VPseudoVCMPM_VF { } multiclass VPseudoVCMPM_VX_VI { - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar WriteVICmpI_MX = !cast("WriteVICmpI_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + } } multiclass VPseudoVRED_VS { diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 3c5b48803f4c4..013b9bd61455a 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -9,6 +9,41 @@ //===----------------------------------------------------------------------===// /// Define scheduler resources associated with def operands. +defvar SchedMxList = ["UpperBound", "M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"]; +// Used for widening and narrowing instructions as it doesn't contain M8. +defvar SchedMxListW = ["UpperBound", "MF8", "MF4", "MF2", "M1", "M2", "M4"]; + +// Creates SchedWrite for each (name, LMUL) pair for LMUL in lmuls argument +multiclass LMULSchedWrites lmuls = SchedMxList> { + foreach mx = lmuls in { + def name # "_" # mx : SchedWrite; + } +} + +// Creates SchedRead for each (name, LMUL) pair for LMUL in lmuls argument +multiclass LMULSchedReads lmuls = SchedMxList> { + foreach mx = lmuls in { + def name # "_" # mx : SchedRead; + } +} + +// Creates WriteRes for each (name, LMUL, resources) tuple for LMUL +// in lmuls argument +multiclass LMULWriteRes resources, + list lmuls = SchedMxList> { + foreach mx = lmuls in { + def : WriteRes(name # "_" # mx), resources>; + } +} + +// Creates ReadAdvance for each (name, LMUL, val) tuple for LMUL +// in lmuls argument +multiclass LMULReadAdvance lmuls = SchedMxList> { + foreach mx = lmuls in { + def : ReadAdvance(name # "_" # mx), val>; + } +} + // 3.6 Vector Byte Length vlenb def WriteRdVLENB : SchedWrite; @@ -79,55 +114,55 @@ def WriteVST8R : SchedWrite; // 11. Vector Integer Arithmetic Instructions // 11.1. Vector Single-Width Integer Add and Subtract // 11.5. Vector Bitwise Logical Instructions -def WriteVIALUV : SchedWrite; -def WriteVIALUX : SchedWrite; -def WriteVIALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIALUV">; +defm "" : LMULSchedWrites<"WriteVIALUX">; +defm "" : LMULSchedWrites<"WriteVIALUI">; // 11.2. Vector Widening Integer Add/Subtract -def WriteVIWALUV : SchedWrite; -def WriteVIWALUX : SchedWrite; -def WriteVIWALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWALUV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWALUX", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWALUI", SchedMxListW>; // 11.3. Vector Integer Extension -def WriteVExtV : SchedWrite; +defm "" : LMULSchedWrites<"WriteVExtV">; // 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions -def WriteVICALUV : SchedWrite; -def WriteVICALUX : SchedWrite; -def WriteVICALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVICALUV">; +defm "" : LMULSchedWrites<"WriteVICALUX">; +defm "" : LMULSchedWrites<"WriteVICALUI">; // 11.6. Vector Single-Width Bit Shift Instructions -def WriteVShiftV : SchedWrite; -def WriteVShiftX : SchedWrite; -def WriteVShiftI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVShiftV">; +defm "" : LMULSchedWrites<"WriteVShiftX">; +defm "" : LMULSchedWrites<"WriteVShiftI">; // 11.7. Vector Narrowing Integer Right Shift Instructions -def WriteVNShiftV : SchedWrite; -def WriteVNShiftX : SchedWrite; -def WriteVNShiftI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVNShiftV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVNShiftX", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVNShiftI", SchedMxListW>; // 11.8. Vector Integer Comparison Instructions // 11.9. Vector Integer Min/Max Instructions -def WriteVICmpV : SchedWrite; -def WriteVICmpX : SchedWrite; -def WriteVICmpI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVICmpV">; +defm "" : LMULSchedWrites<"WriteVICmpX">; +defm "" : LMULSchedWrites<"WriteVICmpI">; // 11.10. Vector Single-Width Integer Multiply Instructions -def WriteVIMulV : SchedWrite; -def WriteVIMulX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMulV">; +defm "" : LMULSchedWrites<"WriteVIMulX">; // 11.11. Vector Integer Divide Instructions -def WriteVIDivV : SchedWrite; -def WriteVIDivX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIDivV">; +defm "" : LMULSchedWrites<"WriteVIDivX">; // 11.12. Vector Widening Integer Multiply Instructions -def WriteVIWMulV : SchedWrite; -def WriteVIWMulX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWMulV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWMulX", SchedMxListW>; // 11.13. Vector Single-Width Integer Multiply-Add Instructions -def WriteVIMulAddV : SchedWrite; -def WriteVIMulAddX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMulAddV">; +defm "" : LMULSchedWrites<"WriteVIMulAddX">; // 11.14. Vector Widening Integer Multiply-Add Instructions -def WriteVIWMulAddV : SchedWrite; -def WriteVIWMulAddX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWMulAddV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWMulAddX", SchedMxListW>; // 11.15. Vector Integer Merge Instructions -def WriteVIMergeV : SchedWrite; -def WriteVIMergeX : SchedWrite; -def WriteVIMergeI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMergeV">; +defm "" : LMULSchedWrites<"WriteVIMergeX">; +defm "" : LMULSchedWrites<"WriteVIMergeI">; // 11.16. Vector Integer Move Instructions -def WriteVIMovV : SchedWrite; -def WriteVIMovX : SchedWrite; -def WriteVIMovI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMovV">; +defm "" : LMULSchedWrites<"WriteVIMovX">; +defm "" : LMULSchedWrites<"WriteVIMovI">; // 12. Vector Fixed-Point Arithmetic Instructions // 12.1. Vector Single-Width Saturating Add and Subtract @@ -303,47 +338,47 @@ def ReadVST8R : SchedRead; // 11. Vector Integer Arithmetic Instructions // 11.1. Vector Single-Width Integer Add and Subtract // 11.5. Vector Bitwise Logical Instructions -def ReadVIALUV : SchedRead; -def ReadVIALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVIALUV">; +defm "" : LMULSchedReads<"ReadVIALUX">; // 11.2. Vector Widening Integer Add/Subtract -def ReadVIWALUV : SchedRead; -def ReadVIWALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWALUV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWALUX", SchedMxListW>; // 11.3. Vector Integer Extension -def ReadVExtV : SchedRead; +defm "" : LMULSchedReads<"ReadVExtV">; // 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions -def ReadVICALUV : SchedRead; -def ReadVICALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVICALUV">; +defm "" : LMULSchedReads<"ReadVICALUX">; // 11.6. Vector Single-Width Bit Shift Instructions -def ReadVShiftV : SchedRead; -def ReadVShiftX : SchedRead; +defm "" : LMULSchedReads<"ReadVShiftV">; +defm "" : LMULSchedReads<"ReadVShiftX">; // 11.7. Vector Narrowing Integer Right Shift Instructions -def ReadVNShiftV : SchedRead; -def ReadVNShiftX : SchedRead; +defm "" : LMULSchedReads<"ReadVNShiftV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVNShiftX", SchedMxListW>; // 11.8. Vector Integer Comparison Instructions // 11.9. Vector Integer Min/Max Instructions -def ReadVICmpV : SchedRead; -def ReadVICmpX : SchedRead; +defm "" : LMULSchedReads<"ReadVICmpV">; +defm "" : LMULSchedReads<"ReadVICmpX">; // 11.10. Vector Single-Width Integer Multiply Instructions -def ReadVIMulV : SchedRead; -def ReadVIMulX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMulV">; +defm "" : LMULSchedReads<"ReadVIMulX">; // 11.11. Vector Integer Divide Instructions -def ReadVIDivV : SchedRead; -def ReadVIDivX : SchedRead; +defm "" : LMULSchedReads<"ReadVIDivV">; +defm "" : LMULSchedReads<"ReadVIDivX">; // 11.12. Vector Widening Integer Multiply Instructions -def ReadVIWMulV : SchedRead; -def ReadVIWMulX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWMulV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWMulX", SchedMxListW>; // 11.13. Vector Single-Width Integer Multiply-Add Instructions -def ReadVIMulAddV : SchedRead; -def ReadVIMulAddX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMulAddV">; +defm "" : LMULSchedReads<"ReadVIMulAddX">; // 11.14. Vector Widening Integer Multiply-Add Instructions -def ReadVIWMulAddV : SchedRead; -def ReadVIWMulAddX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWMulAddV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWMulAddX", SchedMxListW>; // 11.15. Vector Integer Merge Instructions -def ReadVIMergeV : SchedRead; -def ReadVIMergeX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMergeV">; +defm "" : LMULSchedReads<"ReadVIMergeX">; // 11.16. Vector Integer Move Instructions -def ReadVIMovV : SchedRead; -def ReadVIMovX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMovV">; +defm "" : LMULSchedReads<"ReadVIMovX">; // 12. Vector Fixed-Point Arithmetic Instructions // 12.1. Vector Single-Width Saturating Add and Subtract @@ -541,42 +576,42 @@ foreach nf=2-8 in { } } -// 12. Vector Integer Arithmetic Instructions -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; +// 11. Vector Integer Arithmetic Instructions +defm "" : LMULWriteRes<"WriteVIALUV", []>; +defm "" : LMULWriteRes<"WriteVIALUX", []>; +defm "" : LMULWriteRes<"WriteVIALUI", []>; +defm "" : LMULWriteRes<"WriteVIWALUV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWALUX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWALUI", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVExtV", []>; +defm "" : LMULWriteRes<"WriteVICALUV", []>; +defm "" : LMULWriteRes<"WriteVICALUX", []>; +defm "" : LMULWriteRes<"WriteVICALUI", []>; +defm "" : LMULWriteRes<"WriteVShiftV", []>; +defm "" : LMULWriteRes<"WriteVShiftX", []>; +defm "" : LMULWriteRes<"WriteVShiftI", []>; +defm "" : LMULWriteRes<"WriteVNShiftV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVNShiftX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVNShiftI", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVICmpV", []>; +defm "" : LMULWriteRes<"WriteVICmpX", []>; +defm "" : LMULWriteRes<"WriteVICmpI", []>; +defm "" : LMULWriteRes<"WriteVIMulV", []>; +defm "" : LMULWriteRes<"WriteVIMulX", []>; +defm "" : LMULWriteRes<"WriteVIDivV", []>; +defm "" : LMULWriteRes<"WriteVIDivX", []>; +defm "" : LMULWriteRes<"WriteVIWMulV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWMulX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIMulAddV", []>; +defm "" : LMULWriteRes<"WriteVIMulAddX", []>; +defm "" : LMULWriteRes<"WriteVIWMulAddV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWMulAddX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIMergeV", []>; +defm "" : LMULWriteRes<"WriteVIMergeX", []>; +defm "" : LMULWriteRes<"WriteVIMergeI", []>; +defm "" : LMULWriteRes<"WriteVIMovV", []>; +defm "" : LMULWriteRes<"WriteVIMovX", []>; +defm "" : LMULWriteRes<"WriteVIMovI", []>; // 13. Vector Fixed-Point Arithmetic Instructions def : WriteRes; @@ -700,34 +735,34 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; -// 12. Vector Integer Arithmetic Instructions -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; +// 11. Vector Integer Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVIALUV", 0>; +defm "" : LMULReadAdvance<"ReadVIALUX", 0>; +defm "" : LMULReadAdvance<"ReadVIWALUV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWALUX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVExtV", 0>; +defm "" : LMULReadAdvance<"ReadVICALUV", 0>; +defm "" : LMULReadAdvance<"ReadVICALUX", 0>; +defm "" : LMULReadAdvance<"ReadVShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVShiftX", 0>; +defm "" : LMULReadAdvance<"ReadVNShiftV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVNShiftX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVICmpV", 0>; +defm "" : LMULReadAdvance<"ReadVICmpX", 0>; +defm "" : LMULReadAdvance<"ReadVIMulV", 0>; +defm "" : LMULReadAdvance<"ReadVIMulX", 0>; +defm "" : LMULReadAdvance<"ReadVIDivV", 0>; +defm "" : LMULReadAdvance<"ReadVIDivX", 0>; +defm "" : LMULReadAdvance<"ReadVIWMulV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWMulX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm "" : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm "" : LMULReadAdvance<"ReadVIWMulAddV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWMulAddX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVIMergeX", 0>; +defm "" : LMULReadAdvance<"ReadVIMovV", 0>; +defm "" : LMULReadAdvance<"ReadVIMovX", 0>; // 13. Vector Fixed-Point Arithmetic Instructions def : ReadAdvance; From 6f04011f15aca2581571989f349b7a3f332bb4f6 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Thu, 3 Nov 2022 09:07:44 -0700 Subject: [PATCH 163/516] [mlir][llvmir] Add support for llvm.signext/zeroext function attributes. This change-set adds basic support for llvm.signext and llvm.zeroext attributes, and makes sure that the attributes are translated to LLVM IR when attached to arguments. This is needed for https://github.com/llvm/llvm-project/issues/58579 Differential Revision: https://reviews.llvm.org/D137048 --- .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 2 ++ mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 19 +++++++++++++++++++ mlir/test/Target/LLVMIR/llvmir-invalid.mlir | 14 ++++++++++++++ mlir/test/Target/LLVMIR/llvmir.mlir | 16 ++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index ec6d565b67b00..cf39f2cb2d49c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -49,6 +49,8 @@ def LLVM_Dialect : Dialect { static StringRef getStructRetAttrName() { return "llvm.sret"; } static StringRef getInAllocaAttrName() { return "llvm.inalloca"; } static StringRef getNoUndefAttrName() { return "llvm.noundef"; } + static StringRef getSExtAttrName() { return "llvm.signext"; } + static StringRef getZExtAttrName() { return "llvm.zeroext"; } /// Verifies if the attribute is a well-formed value for "llvm.struct_attrs" static LogicalResult verifyStructAttr( diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 3f808a5d05f7d..7c0e3efe8e0b6 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -984,6 +984,25 @@ LogicalResult ModuleTranslation::convertFunctionSignatures() { llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) .addAttribute(llvm::Attribute::NoUndef)); } + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getSExtAttrName())) { + // llvm.signext can be added to any integer argument type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.signext attribute attached to LLVM non-integer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::SExt)); + } + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getZExtAttrName())) { + // llvm.zeroext can be added to any integer argument type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.zeroext attribute attached to LLVM non-integer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::ZExt)); + } + ++argIdx; } diff --git a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir index 7217979dbf005..4627df83eb425 100644 --- a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir @@ -83,6 +83,20 @@ llvm.func @invalid_align(%arg0 : f32 {llvm.align = 4}) -> f32 { // ----- +// expected-error @below{{llvm.signext attribute attached to LLVM non-integer argument}} +llvm.func @invalid_signext(%arg0: f32 {llvm.signext}) { + "llvm.return"() : () -> () +} + +// ----- + +// expected-error @below{{llvm.zeroext attribute attached to LLVM non-integer argument}} +llvm.func @invalid_zeroext(%arg0: f32 {llvm.zeroext}) { + "llvm.return"() : () -> () +} + +// ----- + llvm.func @no_non_complex_struct() -> !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> { // expected-error @below{{expected struct type to be a complex number}} %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 84c750abcfd73..2c4f54c765352 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1108,6 +1108,22 @@ llvm.func @inallocaattr(%arg0: !llvm.ptr {llvm.inalloca = i32}) { // CHECK-LABEL: declare void @inallocaattr_decl(ptr inalloca(i32)) llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) +// CHECK-LABEL: define void @signextattr(i1 signext % +llvm.func @signextattr(%arg0: i1 {llvm.signext}) { + llvm.return +} + +// CHECK-LABEL: declare void @signextattr_decl(i1 signext) +llvm.func @signextattr_decl(i1 {llvm.signext}) + +// CHECK-LABEL: define void @zeroextattr(i1 zeroext % +llvm.func @zeroextattr(%arg0: i1 {llvm.zeroext}) { + llvm.return +} + +// CHECK-LABEL: declare void @zeroextattr_decl(i1 zeroext) +llvm.func @zeroextattr_decl(i1 {llvm.zeroext}) + // CHECK-LABEL: @llvm_varargs(...) llvm.func @llvm_varargs(...) From 74d5c3c0f078bf88b2462e05fa52c2ec1ce11b48 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Wed, 2 Nov 2022 14:31:14 -0700 Subject: [PATCH 164/516] [Flang] Run clang-format on all flang files This will make it easier for me to do reviews. Differential Revision: https://reviews.llvm.org/D137291 --- .../flang/Frontend/CompilerInvocation.h | 7 +- flang/include/flang/Frontend/FrontendAction.h | 3 +- .../include/flang/Frontend/FrontendActions.h | 20 +++--- .../flang/Frontend/TextDiagnosticBuffer.h | 2 +- .../flang/Frontend/TextDiagnosticPrinter.h | 2 +- .../flang/Optimizer/Builder/Runtime/Derived.h | 2 +- flang/include/flang/Semantics/symbol.h | 43 ++++++----- flang/lib/Evaluate/characteristics.cpp | 4 +- flang/lib/Evaluate/fold-integer.cpp | 66 +++++++++-------- flang/lib/Evaluate/fold-real.cpp | 18 ++--- flang/lib/Frontend/CompilerInstance.cpp | 3 +- flang/lib/Frontend/CompilerInvocation.cpp | 71 ++++++++++--------- flang/lib/Frontend/FrontendOptions.cpp | 13 ++-- flang/lib/Frontend/TextDiagnosticPrinter.cpp | 2 +- .../ExecuteCompilerInvocation.cpp | 7 +- flang/lib/Optimizer/Builder/MutableBox.cpp | 45 ++++++------ flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 22 +++--- flang/lib/Semantics/check-acc-structure.h | 1 - flang/lib/Semantics/check-declarations.cpp | 4 +- flang/lib/Semantics/data-to-inits.h | 2 +- flang/lib/Semantics/expression.cpp | 62 ++++++++-------- flang/lib/Semantics/mod-file.cpp | 2 +- flang/lib/Semantics/symbol.cpp | 3 +- flang/lib/Semantics/tools.cpp | 4 +- flang/tools/flang-driver/driver.cpp | 11 +-- flang/unittests/Optimizer/FIRContextTest.cpp | 4 +- flang/unittests/Runtime/Time.cpp | 1 - 27 files changed, 215 insertions(+), 209 deletions(-) diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index a5895451bb74a..58479c8418515 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -96,9 +96,10 @@ class CompilerInvocation : public CompilerInvocationBase { bool warnAsErr = false; - /// This flag controls the unparsing and is used to decide whether to print out - /// the semantically analyzed version of an object or expression or the plain - /// version that does not include any information from semantic analysis. + /// This flag controls the unparsing and is used to decide whether to print + /// out the semantically analyzed version of an object or expression or the + /// plain version that does not include any information from semantic + /// analysis. bool useAnalyzedObjectsForUnparse = true; // Fortran Dialect options diff --git a/flang/include/flang/Frontend/FrontendAction.h b/flang/include/flang/Frontend/FrontendAction.h index e6e268c875e80..266050084f4c0 100644 --- a/flang/include/flang/Frontend/FrontendAction.h +++ b/flang/include/flang/Frontend/FrontendAction.h @@ -135,7 +135,8 @@ class FrontendAction { } private: - template bool reportFatalErrors(const char (&message)[N]); + template + bool reportFatalErrors(const char (&message)[N]); }; } // namespace Fortran::frontend diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index 3f50d320c1f3b..eb9dda75c516c 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -29,8 +29,12 @@ namespace Fortran::frontend { // TODO: This is a copy from f18.cpp. It doesn't really belong here and should // be moved to a more suitable place in future. struct MeasurementVisitor { - template bool Pre(const A &) { return true; } - template void Post(const A &) { + template + bool Pre(const A &) { + return true; + } + template + void Post(const A &) { ++objects; bytes += sizeof(A); } @@ -148,8 +152,8 @@ class PluginParseTreeAction : public PrescanAndSemaAction { /// \param extension The extension to use for the output file (ignored when /// the user decides to print to stdout via `-o -`) /// \return Null on error, ostream for the output file otherwise - std::unique_ptr createOutputFile( - llvm::StringRef extension); + std::unique_ptr + createOutputFile(llvm::StringRef extension); }; //===----------------------------------------------------------------------===// @@ -184,10 +188,10 @@ class DebugDumpAllAction : public PrescanAndSemaDebugAction { /// maintain some level of consistency/similarity between the drivers. enum class BackendActionTy { Backend_EmitAssembly, ///< Emit native assembly files - Backend_EmitObj, ///< Emit native object files - Backend_EmitBC, ///< Emit LLVM bitcode files - Backend_EmitLL, ///< Emit human-readable LLVM assembly - Backend_EmitMLIR ///< Emit MLIR files + Backend_EmitObj, ///< Emit native object files + Backend_EmitBC, ///< Emit LLVM bitcode files + Backend_EmitLL, ///< Emit human-readable LLVM assembly + Backend_EmitMLIR ///< Emit MLIR files }; /// Abstract base class for actions that generate code (MLIR, LLVM IR, assembly diff --git a/flang/include/flang/Frontend/TextDiagnosticBuffer.h b/flang/include/flang/Frontend/TextDiagnosticBuffer.h index fb1028a36ea35..7eba843661328 100644 --- a/flang/include/flang/Frontend/TextDiagnosticBuffer.h +++ b/flang/include/flang/Frontend/TextDiagnosticBuffer.h @@ -45,7 +45,7 @@ class TextDiagnosticBuffer : public clang::DiagnosticConsumer { public: void HandleDiagnostic(clang::DiagnosticsEngine::Level diagLevel, - const clang::Diagnostic &info) override; + const clang::Diagnostic &info) override; /// Flush the buffered diagnostics to a given diagnostic engine. void flushDiagnostics(clang::DiagnosticsEngine &diags) const; diff --git a/flang/include/flang/Frontend/TextDiagnosticPrinter.h b/flang/include/flang/Frontend/TextDiagnosticPrinter.h index 3e6e6a1977d1d..0e092a0a012e0 100644 --- a/flang/include/flang/Frontend/TextDiagnosticPrinter.h +++ b/flang/include/flang/Frontend/TextDiagnosticPrinter.h @@ -51,7 +51,7 @@ class TextDiagnosticPrinter : public clang::DiagnosticConsumer { void setPrefix(std::string value) { prefix = std::move(value); } void HandleDiagnostic(clang::DiagnosticsEngine::Level level, - const clang::Diagnostic &info) override; + const clang::Diagnostic &info) override; }; } // namespace Fortran::frontend diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h index 816d561d38913..239eab1d4e418 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h @@ -17,7 +17,7 @@ class Location; namespace fir { class FirOpBuilder; class RecordType; -} +} // namespace fir namespace fir::runtime { diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 9c3c22ca8ad32..ad01b1235c537 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -625,28 +625,27 @@ class Symbol { bool IsSubprogram() const; bool IsFromModFile() const; bool HasExplicitInterface() const { - return common::visit(common::visitors{ - [](const SubprogramDetails &) { return true; }, - [](const SubprogramNameDetails &) { return true; }, - [&](const ProcEntityDetails &x) { - return attrs_.test(Attr::INTRINSIC) || - x.HasExplicitInterface(); - }, - [](const ProcBindingDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const UseDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const HostAssocDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const GenericDetails &x) { - return x.specific() && - x.specific()->HasExplicitInterface(); - }, - [](const auto &) { return false; }, - }, + return common::visit( + common::visitors{ + [](const SubprogramDetails &) { return true; }, + [](const SubprogramNameDetails &) { return true; }, + [&](const ProcEntityDetails &x) { + return attrs_.test(Attr::INTRINSIC) || x.HasExplicitInterface(); + }, + [](const ProcBindingDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const UseDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const HostAssocDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const GenericDetails &x) { + return x.specific() && x.specific()->HasExplicitInterface(); + }, + [](const auto &) { return false; }, + }, details_); } diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index cf43bab6a5eb7..1795751fbf045 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -506,9 +506,7 @@ static std::optional CharacterizeProcedure( } return intrinsic; } - const semantics::ProcInterface &interface { - proc.interface() - }; + const semantics::ProcInterface &interface { proc.interface() }; if (const semantics::Symbol * interfaceSymbol{interface.symbol()}) { auto interface { CharacterizeProcedure(*interfaceSymbol, context, seenProcs) diff --git a/flang/lib/Evaluate/fold-integer.cpp b/flang/lib/Evaluate/fold-integer.cpp index bc4bd3b19ea21..603c4a46f9b52 100644 --- a/flang/lib/Evaluate/fold-integer.cpp +++ b/flang/lib/Evaluate/fold-integer.cpp @@ -763,20 +763,20 @@ Expr> FoldIntrinsicFunction( context, std::move(funcRef), &Scalar::IEOR, Scalar{}); } else if (name == "ishft") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc([&](const Scalar &i, - const Scalar &pos) -> Scalar { - auto posVal{static_cast(pos.ToInt64())}; - if (posVal < -i.bits) { - context.messages().Say( - "SHIFT=%d count for ishft is less than %d"_err_en_US, posVal, - -i.bits); - } else if (posVal > i.bits) { - context.messages().Say( - "SHIFT=%d count for ishft is greater than %d"_err_en_US, posVal, - i.bits); - } - return i.ISHFT(posVal); - })); + ScalarFunc( + [&](const Scalar &i, const Scalar &pos) -> Scalar { + auto posVal{static_cast(pos.ToInt64())}; + if (posVal < -i.bits) { + context.messages().Say( + "SHIFT=%d count for ishft is less than %d"_err_en_US, + posVal, -i.bits); + } else if (posVal > i.bits) { + context.messages().Say( + "SHIFT=%d count for ishft is greater than %d"_err_en_US, + posVal, i.bits); + } + return i.ISHFT(posVal); + })); } else if (name == "ishftc") { if (args.at(2)) { // SIZE= is present return FoldElementalIntrinsic(context, @@ -940,16 +940,15 @@ Expr> FoldIntrinsicFunction( })); } else if (name == "modulo") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFuncWithContext( - [](FoldingContext &context, const Scalar &x, - const Scalar &y) -> Scalar { - auto result{x.MODULO(y)}; - if (result.overflow) { - context.messages().Say( - "modulo() folding overflowed"_warn_en_US); - } - return result.value; - })); + ScalarFuncWithContext([](FoldingContext &context, + const Scalar &x, + const Scalar &y) -> Scalar { + auto result{x.MODULO(y)}; + if (result.overflow) { + context.messages().Say("modulo() folding overflowed"_warn_en_US); + } + return result.value; + })); } else if (name == "not") { return FoldElementalIntrinsic( context, std::move(funcRef), &Scalar::NOT); @@ -1062,16 +1061,15 @@ Expr> FoldIntrinsicFunction( })); } else if (name == "sign") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc( - [&context](const Scalar &j, const Scalar &k) -> Scalar { - typename Scalar::ValueWithOverflow result{j.SIGN(k)}; - if (result.overflow) { - context.messages().Say( - "sign(integer(kind=%d)) folding overflowed"_warn_en_US, - KIND); - } - return result.value; - })); + ScalarFunc([&context](const Scalar &j, + const Scalar &k) -> Scalar { + typename Scalar::ValueWithOverflow result{j.SIGN(k)}; + if (result.overflow) { + context.messages().Say( + "sign(integer(kind=%d)) folding overflowed"_warn_en_US, KIND); + } + return result.value; + })); } else if (name == "size") { if (auto shape{GetContextFreeShape(context, args[0])}) { if (auto &dimArg{args[1]}) { // DIM= is present, get one extent diff --git a/flang/lib/Evaluate/fold-real.cpp b/flang/lib/Evaluate/fold-real.cpp index 38ece3f21edd2..ef90d12446454 100644 --- a/flang/lib/Evaluate/fold-real.cpp +++ b/flang/lib/Evaluate/fold-real.cpp @@ -127,15 +127,15 @@ Expr> FoldIntrinsicFunction( ? common::RoundingMode::ToZero : common::RoundingMode::TiesAwayFromZero}; return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc([&name, &context, mode]( - const Scalar &x) -> Scalar { - ValueWithRealFlags> y{x.ToWholeNumber(mode)}; - if (y.flags.test(RealFlag::Overflow)) { - context.messages().Say( - "%s intrinsic folding overflow"_warn_en_US, name); - } - return y.value; - })); + ScalarFunc( + [&name, &context, mode](const Scalar &x) -> Scalar { + ValueWithRealFlags> y{x.ToWholeNumber(mode)}; + if (y.flags.test(RealFlag::Overflow)) { + context.messages().Say( + "%s intrinsic folding overflow"_warn_en_US, name); + } + return y.value; + })); } else if (name == "dim") { return FoldElementalIntrinsic(context, std::move(funcRef), ScalarFunc( diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp index 951b8d179bed0..0d01608434618 100644 --- a/flang/lib/Frontend/CompilerInstance.cpp +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -117,7 +117,8 @@ CompilerInstance::createOutputFileImpl(llvm::StringRef outputFilePath, std::unique_ptr os; std::error_code error; - os.reset(new llvm::raw_fd_ostream(outputFilePath, error, + os.reset(new llvm::raw_fd_ostream( + outputFilePath, error, (binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_TextWithCRLF))); if (error) { return llvm::errorCodeToError(error); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 10c73169d0d02..252e1a7e697a5 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -379,12 +379,13 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // Set fortranForm based on options -ffree-form and -ffixed-form. - if (const auto *arg = args.getLastArg(clang::driver::options::OPT_ffixed_form, - clang::driver::options::OPT_ffree_form)) { + if (const auto *arg = + args.getLastArg(clang::driver::options::OPT_ffixed_form, + clang::driver::options::OPT_ffree_form)) { opts.fortranForm = arg->getOption().matches(clang::driver::options::OPT_ffixed_form) - ? FortranForm::FixedForm - : FortranForm::FreeForm; + ? FortranForm::FixedForm + : FortranForm::FreeForm; } // Set fixedFormColumns based on -ffixed-line-length= @@ -425,22 +426,26 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, opts.features.Enable( Fortran::common::LanguageFeature::ImplicitNoneTypeAlways, args.hasFlag(clang::driver::options::OPT_fimplicit_none, - clang::driver::options::OPT_fno_implicit_none, false)); + clang::driver::options::OPT_fno_implicit_none, false)); // -f{no-}backslash opts.features.Enable(Fortran::common::LanguageFeature::BackslashEscapes, - args.hasFlag(clang::driver::options::OPT_fbackslash, - clang::driver::options::OPT_fno_backslash, false)); + args.hasFlag(clang::driver::options::OPT_fbackslash, + clang::driver::options::OPT_fno_backslash, + false)); // -f{no-}logical-abbreviations - opts.features.Enable(Fortran::common::LanguageFeature::LogicalAbbreviations, + opts.features.Enable( + Fortran::common::LanguageFeature::LogicalAbbreviations, args.hasFlag(clang::driver::options::OPT_flogical_abbreviations, - clang::driver::options::OPT_fno_logical_abbreviations, false)); + clang::driver::options::OPT_fno_logical_abbreviations, + false)); // -f{no-}xor-operator - opts.features.Enable(Fortran::common::LanguageFeature::XOROperator, + opts.features.Enable( + Fortran::common::LanguageFeature::XOROperator, args.hasFlag(clang::driver::options::OPT_fxor_operator, - clang::driver::options::OPT_fno_xor_operator, false)); + clang::driver::options::OPT_fno_xor_operator, false)); // -fno-automatic if (args.hasArg(clang::driver::options::OPT_fno_automatic)) { @@ -494,11 +499,11 @@ static std::string getOpenMPHeadersDir() { /// /// \param [in] opts The preprocessor options instance /// \param [out] args The list of input arguments -static void parsePreprocessorArgs( - Fortran::frontend::PreprocessorOptions &opts, llvm::opt::ArgList &args) { +static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, + llvm::opt::ArgList &args) { // Add macros from the command line. - for (const auto *currentArg : args.filtered( - clang::driver::options::OPT_D, clang::driver::options::OPT_U)) { + for (const auto *currentArg : args.filtered(clang::driver::options::OPT_D, + clang::driver::options::OPT_U)) { if (currentArg->getOption().matches(clang::driver::options::OPT_D)) { opts.addMacroDef(currentArg->getValue()); } else { @@ -513,7 +518,7 @@ static void parsePreprocessorArgs( // Prepend the ordered list of -intrinsic-modules-path // to the default location to search. for (const auto *currentArg : - args.filtered(clang::driver::options::OPT_fintrinsic_modules_path)) + args.filtered(clang::driver::options::OPT_fintrinsic_modules_path)) opts.searchDirectoriesFromIntrModPath.emplace_back(currentArg->getValue()); // -cpp/-nocpp @@ -521,8 +526,8 @@ static void parsePreprocessorArgs( clang::driver::options::OPT_cpp, clang::driver::options::OPT_nocpp)) opts.macrosFlag = (currentArg->getOption().matches(clang::driver::options::OPT_cpp)) - ? PPMacrosFlag::Include - : PPMacrosFlag::Exclude; + ? PPMacrosFlag::Include + : PPMacrosFlag::Exclude; opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat); opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P); @@ -531,7 +536,7 @@ static void parsePreprocessorArgs( /// Parses all semantic related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -J/module-dir option @@ -542,7 +547,7 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, if (moduleDirList.size() > 1) { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only one '-module-dir/-J' option allowed"); + "Only one '-module-dir/-J' option allowed"); diags.Report(diagID); } if (moduleDirList.size() == 1) @@ -570,7 +575,7 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, /// Parses all diagnostics related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -Werror option @@ -583,7 +588,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } else { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only `-Werror` is supported currently."); + "Only `-Werror` is supported currently."); diags.Report(diagID); } } @@ -598,7 +603,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, /// Parses all Dialect related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -fdefault* family @@ -615,9 +620,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, if (!args.hasArg(clang::driver::options::OPT_fdefault_real_8)) { // -fdefault-double-8 has to be used with -fdefault-real-8 // to be compatible with gfortran - const unsigned diagID = - diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Use of `-fdefault-double-8` requires `-fdefault-real-8`"); + const unsigned diagID = diags.getCustomDiagID( + clang::DiagnosticsEngine::Error, + "Use of `-fdefault-double-8` requires `-fdefault-real-8`"); diags.Report(diagID); } // https://gcc.gnu.org/onlinedocs/gfortran/Fortran-Dialect-Options.html @@ -651,7 +656,7 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } else { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only -std=f2018 is allowed currently."); + "Only -std=f2018 is allowed currently."); diags.Report(diagID); } } @@ -812,12 +817,12 @@ void CompilerInvocation::setDefaultPredefinitions() { // Populate the macro list with version numbers and other predefinitions. fortranOptions.predefinitions.emplace_back("__flang__", "1"); - fortranOptions.predefinitions.emplace_back( - "__flang_major__", FLANG_VERSION_MAJOR_STRING); - fortranOptions.predefinitions.emplace_back( - "__flang_minor__", FLANG_VERSION_MINOR_STRING); - fortranOptions.predefinitions.emplace_back( - "__flang_patchlevel__", FLANG_VERSION_PATCHLEVEL_STRING); + fortranOptions.predefinitions.emplace_back("__flang_major__", + FLANG_VERSION_MAJOR_STRING); + fortranOptions.predefinitions.emplace_back("__flang_minor__", + FLANG_VERSION_MINOR_STRING); + fortranOptions.predefinitions.emplace_back("__flang_patchlevel__", + FLANG_VERSION_PATCHLEVEL_STRING); // Add predefinitions based on extensions enabled if (frontendOptions.features.IsEnabled( diff --git a/flang/lib/Frontend/FrontendOptions.cpp b/flang/lib/Frontend/FrontendOptions.cpp index 8353858ff5094..504fac6cd6fb9 100644 --- a/flang/lib/Frontend/FrontendOptions.cpp +++ b/flang/lib/Frontend/FrontendOptions.cpp @@ -17,22 +17,23 @@ using namespace Fortran::frontend; bool Fortran::frontend::isFixedFormSuffix(llvm::StringRef suffix) { // Note: Keep this list in-sync with flang/test/lit.cfg.py return suffix == "f77" || suffix == "f" || suffix == "F" || suffix == "ff" || - suffix == "for" || suffix == "FOR" || suffix == "fpp" || suffix == "FPP"; + suffix == "for" || suffix == "FOR" || suffix == "fpp" || + suffix == "FPP"; } bool Fortran::frontend::isFreeFormSuffix(llvm::StringRef suffix) { // Note: Keep this list in-sync with flang/test/lit.cfg.py // TODO: Add Cuda Fortan files (i.e. `*.cuf` and `*.CUF`). return suffix == "f90" || suffix == "F90" || suffix == "ff90" || - suffix == "f95" || suffix == "F95" || suffix == "ff95" || - suffix == "f03" || suffix == "F03" || suffix == "f08" || - suffix == "F08" || suffix == "f18" || suffix == "F18"; + suffix == "f95" || suffix == "F95" || suffix == "ff95" || + suffix == "f03" || suffix == "F03" || suffix == "f08" || + suffix == "F08" || suffix == "f18" || suffix == "F18"; } bool Fortran::frontend::isToBePreprocessed(llvm::StringRef suffix) { return suffix == "F" || suffix == "FOR" || suffix == "fpp" || - suffix == "FPP" || suffix == "F90" || suffix == "F95" || - suffix == "F03" || suffix == "F08" || suffix == "F18"; + suffix == "FPP" || suffix == "F90" || suffix == "F95" || + suffix == "F03" || suffix == "F08" || suffix == "F18"; } InputKind FrontendOptions::getInputKindForExtension(llvm::StringRef extension) { diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp index 12c41d77ba467..7ae19645e40a4 100644 --- a/flang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -46,7 +46,7 @@ void TextDiagnosticPrinter::HandleDiagnostic( // We only emit diagnostics in contexts that lack valid source locations. assert(!info.getLocation().isValid() && - "Diagnostics with valid source location are not supported"); + "Diagnostics with valid source location are not supported"); Fortran::frontend::TextDiagnostic::printDiagnosticLevel(os, level, diagOpts->ShowColors); diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 54cbd2c99e4a0..b99d2b7196da3 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -82,7 +82,7 @@ createFrontendAction(CompilerInstance &ci) { return std::make_unique(); case PluginAction: { for (const FrontendPluginRegistry::entry &plugin : - FrontendPluginRegistry::entries()) { + FrontendPluginRegistry::entries()) { if (plugin.getName() == ci.getFrontendOpts().actionName) { std::unique_ptr p(plugin.instantiate()); return std::move(p); @@ -101,8 +101,9 @@ createFrontendAction(CompilerInstance &ci) { bool executeCompilerInvocation(CompilerInstance *flang) { // Honor -help. if (flang->getFrontendOpts().showHelp) { - clang::driver::getDriverOptTable().printHelp(llvm::outs(), - "flang-new -fc1 [options] file...", "LLVM 'Flang' Compiler", + clang::driver::getDriverOptTable().printHelp( + llvm::outs(), "flang-new -fc1 [options] file...", + "LLVM 'Flang' Compiler", /*Include=*/clang::driver::options::FC1Option, /*Exclude=*/llvm::opt::DriverFlag::HelpHidden, /*ShowAllAliases=*/false); diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp index 2c6b1d05bed40..7773125919710 100644 --- a/flang/lib/Optimizer/Builder/MutableBox.cpp +++ b/flang/lib/Optimizer/Builder/MutableBox.cpp @@ -809,28 +809,29 @@ fir::factory::MutableBoxReallocation fir::factory::genReallocIfNeeded( TODO(loc, "automatic allocation of derived type allocatable with " "length parameters"); } - auto ifOp = - builder - .genIfOp(loc, {addrType}, mustReallocate, - /*withElseRegion=*/true) - .genThen([&]() { - // If shape or length mismatch, allocate new storage. - // When rhs is a scalar, keep the previous shape - auto extents = shape.empty() - ? mlir::ValueRange(previousExtents) - : shape; - auto heap = allocateAndInitNewStorage( - builder, loc, box, extents, lengthParams, - ".auto.alloc"); - if (storageHandler) - storageHandler(getExtValForStorage(heap)); - builder.create(loc, heap); - }) - .genElse([&]() { - if (storageHandler) - storageHandler(getExtValForStorage(addr)); - builder.create(loc, addr); - }); + auto ifOp = builder + .genIfOp(loc, {addrType}, mustReallocate, + /*withElseRegion=*/true) + .genThen([&]() { + // If shape or length mismatch, allocate new + // storage. When rhs is a scalar, keep the + // previous shape + auto extents = + shape.empty() + ? mlir::ValueRange(previousExtents) + : shape; + auto heap = allocateAndInitNewStorage( + builder, loc, box, extents, lengthParams, + ".auto.alloc"); + if (storageHandler) + storageHandler(getExtValForStorage(heap)); + builder.create(loc, heap); + }) + .genElse([&]() { + if (storageHandler) + storageHandler(getExtValForStorage(addr)); + builder.create(loc, addr); + }); ifOp.end(); auto newAddr = ifOp.getResults()[0]; builder.create( diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index 6868d32840a2e..9bf51cc6ee1a4 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -841,17 +841,17 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase { auto argNo = newInTys.size(); if (attr.isByVal()) { if (auto align = attr.getAlignment()) - fixups.emplace_back( - FixupTy::Codes::ArgumentAsLoad, argNo, - [=](mlir::func::FuncOp func) { - auto elemType = fir::dyn_cast_ptrOrBoxEleTy( - func.getFunctionType().getInput(argNo)); - func.setArgAttr(argNo, "llvm.byval", - mlir::TypeAttr::get(elemType)); - func.setArgAttr(argNo, "llvm.align", - rewriter->getIntegerAttr( - rewriter->getIntegerType(32), align)); - }); + fixups.emplace_back(FixupTy::Codes::ArgumentAsLoad, argNo, + [=](mlir::func::FuncOp func) { + auto elemType = fir::dyn_cast_ptrOrBoxEleTy( + func.getFunctionType().getInput(argNo)); + func.setArgAttr(argNo, "llvm.byval", + mlir::TypeAttr::get(elemType)); + func.setArgAttr( + argNo, "llvm.align", + rewriter->getIntegerAttr( + rewriter->getIntegerType(32), align)); + }); else fixups.emplace_back(FixupTy::Codes::ArgumentAsLoad, newInTys.size(), [=](mlir::func::FuncOp func) { diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h index d7de0c5b02eb4..fda626e57ba63 100644 --- a/flang/lib/Semantics/check-acc-structure.h +++ b/flang/lib/Semantics/check-acc-structure.h @@ -70,7 +70,6 @@ class AccStructureChecker #include "llvm/Frontend/OpenACC/ACC.inc" private: - bool CheckAllowedModifier(llvm::acc::Clause clause); bool IsComputeConstruct(llvm::acc::Directive directive) const; bool IsInsideComputeConstruct() const; diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index e0efaa7746a3e..85dbbb14e721a 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1573,9 +1573,7 @@ void CheckHelper::CheckPassArg( return; } const auto &name{proc.name()}; - const Symbol *interface { - interface0 ? FindInterface(*interface0) : nullptr - }; + const Symbol *interface { interface0 ? FindInterface(*interface0) : nullptr }; if (!interface) { messages_.Say(name, "Procedure component '%s' must have NOPASS attribute or explicit interface"_err_en_US, diff --git a/flang/lib/Semantics/data-to-inits.h b/flang/lib/Semantics/data-to-inits.h index d39a9a39bcc44..10d850d23d5d6 100644 --- a/flang/lib/Semantics/data-to-inits.h +++ b/flang/lib/Semantics/data-to-inits.h @@ -18,7 +18,7 @@ namespace Fortran::parser { struct DataStmtSet; struct DataStmtValue; -} +} // namespace Fortran::parser namespace Fortran::evaluate { class ExpressionAnalyzer; } diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 3853b2eebc6b4..182e83eeea944 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2307,17 +2307,17 @@ auto ExpressionAnalyzer::GetCalleeAndArguments( const parser::ProcedureDesignator &pd, ActualArguments &&arguments, bool isSubroutine, bool mightBeStructureConstructor) -> std::optional { - return common::visit( - common::visitors{ - [&](const parser::Name &name) { - return GetCalleeAndArguments(name, std::move(arguments), - isSubroutine, mightBeStructureConstructor); - }, - [&](const parser::ProcComponentRef &pcr) { - return AnalyzeProcedureComponentRef( - pcr, std::move(arguments), isSubroutine); - }, - }, + return common::visit(common::visitors{ + [&](const parser::Name &name) { + return GetCalleeAndArguments(name, + std::move(arguments), isSubroutine, + mightBeStructureConstructor); + }, + [&](const parser::ProcComponentRef &pcr) { + return AnalyzeProcedureComponentRef( + pcr, std::move(arguments), isSubroutine); + }, + }, pd.u); } @@ -3417,26 +3417,26 @@ void ArgumentAnalyzer::Analyze( // be detected and represented (they're not expressions). // TODO: C1534: Don't allow a "restricted" specific intrinsic to be passed. std::optional actual; - common::visit( - common::visitors{ - [&](const common::Indirection &x) { - actual = AnalyzeExpr(x.value()); - SetArgSourceLocation(actual, x.value().source); - }, - [&](const parser::AltReturnSpec &label) { - if (!isSubroutine) { - context_.Say("alternate return specification may not appear on" - " function reference"_err_en_US); - } - actual = ActualArgument(label.v); - }, - [&](const parser::ActualArg::PercentRef &) { - context_.Say("%REF() intrinsic for arguments"_todo_en_US); - }, - [&](const parser::ActualArg::PercentVal &) { - context_.Say("%VAL() intrinsic for arguments"_todo_en_US); - }, - }, + common::visit(common::visitors{ + [&](const common::Indirection &x) { + actual = AnalyzeExpr(x.value()); + SetArgSourceLocation(actual, x.value().source); + }, + [&](const parser::AltReturnSpec &label) { + if (!isSubroutine) { + context_.Say( + "alternate return specification may not appear on" + " function reference"_err_en_US); + } + actual = ActualArgument(label.v); + }, + [&](const parser::ActualArg::PercentRef &) { + context_.Say("%REF() intrinsic for arguments"_todo_en_US); + }, + [&](const parser::ActualArg::PercentVal &) { + context_.Say("%VAL() intrinsic for arguments"_todo_en_US); + }, + }, std::get(arg.t).u); if (actual) { if (const auto &argKW{std::get>(arg.t)}) { diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 422c8735eab3a..3659ead0a568d 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -687,7 +687,7 @@ void ModFileWriter::PutProcEntity(llvm::raw_ostream &os, const Symbol &symbol) { return; } const auto &details{symbol.get()}; - const ProcInterface &interface{details.interface()}; + const ProcInterface &interface { details.interface() }; Attrs attrs{symbol.attrs()}; if (details.passName()) { attrs.reset(Attr::PASS); diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index fe7942bbb7d79..4fe6ee4bd0076 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -715,7 +715,8 @@ bool GenericKind::Is(GenericKind::OtherKind x) const { return y && *y == x; } -bool SymbolOffsetCompare::operator()(const SymbolRef &x, const SymbolRef &y) const { +bool SymbolOffsetCompare::operator()( + const SymbolRef &x, const SymbolRef &y) const { const Symbol *xCommon{FindCommonBlockContaining(*x)}; const Symbol *yCommon{FindCommonBlockContaining(*y)}; if (xCommon) { diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 6bf2a574fe3fa..7484993d2393e 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -456,9 +456,7 @@ const Symbol *FindInterface(const Symbol &symbol) { return common::visit( common::visitors{ [](const ProcEntityDetails &details) { - const Symbol *interface { - details.interface().symbol() - }; + const Symbol *interface { details.interface().symbol() }; return interface ? FindInterface(*interface) : nullptr; }, [](const ProcBindingDetails &details) { diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp index 28a8db2584b5c..e4880b62ee857 100644 --- a/flang/tools/flang-driver/driver.cpp +++ b/flang/tools/flang-driver/driver.cpp @@ -72,8 +72,8 @@ static int executeFC1Tool(llvm::SmallVectorImpl &argV) { return 1; } -static void ExpandResponseFiles( - llvm::StringSaver &saver, llvm::SmallVectorImpl &args) { +static void ExpandResponseFiles(llvm::StringSaver &saver, + llvm::SmallVectorImpl &args) { // We're defaulting to the GNU syntax, since we don't have a CL mode. llvm::cl::TokenizerCallback tokenizer = &llvm::cl::TokenizeGNUCommandLine; llvm::cl::ExpansionContext ExpCtx(saver.getAllocator(), tokenizer); @@ -96,8 +96,8 @@ int main(int argc, const char **argv) { ExpandResponseFiles(saver, args); // Check if flang-new is in the frontend mode - auto firstArg = std::find_if( - args.begin() + 1, args.end(), [](const char *a) { return a != nullptr; }); + auto firstArg = std::find_if(args.begin() + 1, args.end(), + [](const char *a) { return a != nullptr; }); if (firstArg != args.end()) { if (llvm::StringRef(args[1]).startswith("-cc1")) { llvm::errs() << "error: unknown integrated tool '" << args[1] << "'. " @@ -127,7 +127,8 @@ int main(int argc, const char **argv) { // Prepare the driver clang::driver::Driver theDriver(driverPath, - llvm::sys::getDefaultTargetTriple(), diags, "flang LLVM compiler"); + llvm::sys::getDefaultTargetTriple(), diags, + "flang LLVM compiler"); theDriver.setTargetAndMode(targetandMode); std::unique_ptr c( theDriver.BuildCompilation(args)); diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp index 5976f2c4979f3..7e1b97bbbd4f2 100644 --- a/flang/unittests/Optimizer/FIRContextTest.cpp +++ b/flang/unittests/Optimizer/FIRContextTest.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Support/FIRContext.h" -#include "flang/Optimizer/Support/KindMapping.h" +#include "gtest/gtest.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" +#include "flang/Optimizer/Support/KindMapping.h" #include "llvm/Support/Host.h" -#include "gtest/gtest.h" #include using namespace fir; diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp index 479f82ffe524c..ceccb4a70805c 100644 --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -166,4 +166,3 @@ TEST(TimeIntrinsics, DateAndTime) { EXPECT_LE(minutes, 59); } } - From c060de72ec48386b5273f928d7098075bb5c4956 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 3 Nov 2022 17:41:41 +0100 Subject: [PATCH 165/516] GlobalISel: Fix artifact combine value finder look through copy Search for COPY source in instruction we get from look through (not copy dst). Differential Revision: https://reviews.llvm.org/D137273 --- .../GlobalISel/LegalizationArtifactCombiner.h | 5 +- .../artifact-combiner-build-vector.mir | 18 ++ .../AMDGPU/GlobalISel/extractelement.i128.ll | 84 ++----- .../AMDGPU/GlobalISel/insertelement.ll | 220 +++++++++--------- 4 files changed, 147 insertions(+), 180 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 3a53017a4e1c9..1921dcff4a60c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -724,7 +724,10 @@ class LegalizationArtifactCombiner { /// and its callees rely upon. Register findValueFromDefImpl(Register DefReg, unsigned StartBit, unsigned Size) { - MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); + Optional DefSrcReg = + getDefSrcRegIgnoringCopies(DefReg, MRI); + MachineInstr *Def = DefSrcReg->MI; + DefReg = DefSrcReg->Reg; // If the instruction has a single def, then simply delegate the search. // For unmerge however with multiple defs, we need to compute the offset // into the source of the unmerge. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir index 87949ebef75a0..01e4162f0d503 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir @@ -252,3 +252,21 @@ body: | %7:_(<5 x s32>) = G_BUILD_VECTOR %3, %4, %5, %6, %2 $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9= COPY %7 ... + +--- +name: value_finder_look_through_copy +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX9-LABEL: name: value_finder_look_through_copy + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) + %3:_(s32) = COPY %1 + %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %2 + $vgpr2_vgpr3= COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll index 5194410266813..093b40114d5ff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -968,12 +968,8 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-NEXT: v_mov_b32_e32 v3, v7 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx1: @@ -981,12 +977,8 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v4 -; GFX8-NEXT: v_mov_b32_e32 v1, v5 -; GFX8-NEXT: v_mov_b32_e32 v2, v6 -; GFX8-NEXT: v_mov_b32_e32 v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx1: @@ -995,34 +987,24 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:16 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v4 -; GFX7-NEXT: v_mov_b32_e32 v1, v5 -; GFX7-NEXT: v_mov_b32_e32 v2, v6 -; GFX7-NEXT: v_mov_b32_e32 v3, v7 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v4 -; GFX10-NEXT: v_mov_b32_e32 v1, v5 -; GFX10-NEXT: v_mov_b32_e32 v2, v6 -; GFX10-NEXT: v_mov_b32_e32 v3, v7 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 -; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 1 @@ -1033,12 +1015,8 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v8 -; GFX9-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-NEXT: v_mov_b32_e32 v2, v10 -; GFX9-NEXT: v_mov_b32_e32 v3, v11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx2: @@ -1046,12 +1024,8 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v8 -; GFX8-NEXT: v_mov_b32_e32 v1, v9 -; GFX8-NEXT: v_mov_b32_e32 v2, v10 -; GFX8-NEXT: v_mov_b32_e32 v3, v11 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx2: @@ -1060,34 +1034,24 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:32 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v8 -; GFX7-NEXT: v_mov_b32_e32 v1, v9 -; GFX7-NEXT: v_mov_b32_e32 v2, v10 -; GFX7-NEXT: v_mov_b32_e32 v3, v11 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v8 -; GFX10-NEXT: v_mov_b32_e32 v1, v9 -; GFX10-NEXT: v_mov_b32_e32 v2, v10 -; GFX10-NEXT: v_mov_b32_e32 v3, v11 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 2 @@ -1098,12 +1062,8 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v12 -; GFX9-NEXT: v_mov_b32_e32 v1, v13 -; GFX9-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-NEXT: v_mov_b32_e32 v3, v15 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx3: @@ -1111,12 +1071,8 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v12 -; GFX8-NEXT: v_mov_b32_e32 v1, v13 -; GFX8-NEXT: v_mov_b32_e32 v2, v14 -; GFX8-NEXT: v_mov_b32_e32 v3, v15 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx3: @@ -1125,34 +1081,24 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:48 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v12 -; GFX7-NEXT: v_mov_b32_e32 v1, v13 -; GFX7-NEXT: v_mov_b32_e32 v2, v14 -; GFX7-NEXT: v_mov_b32_e32 v3, v15 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v12 -; GFX10-NEXT: v_mov_b32_e32 v1, v13 -; GFX10-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-NEXT: v_mov_b32_e32 v3, v15 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13 -; GFX11-NEXT: v_dual_mov_b32 v2, v14 :: v_dual_mov_b32 v3, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index d33155534c284..f4b821ca602c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -819,22 +819,22 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s8, s18 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s19 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v2 @@ -843,29 +843,29 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 7, v2 -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[16:17] -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[16:17] -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v0, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v0, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v0, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v18, v0, s[14:15] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v1, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v19, v1, s[14:15] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[16:17] +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[16:17] +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[14:15] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[14:15] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; @@ -1022,23 +1022,23 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 -; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 @@ -1047,30 +1047,30 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 -; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v6, v1, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v1, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v1, s[10:11] -; GPRIDX-NEXT: v_mov_b32_e32 v16, s19 -; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v3, v16, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v16, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v16, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v16, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v16, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v16, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v16, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v16, s[10:11] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GPRIDX-NEXT: v_mov_b32_e32 v0, s19 +; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v0, s[10:11] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_endpgm ; @@ -1444,22 +1444,22 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s1 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 @@ -1468,29 +1468,29 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v2 -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v0, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v0, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v18, v0, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v1, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v1, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v19, v1, s[10:11] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[10:11] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_endpgm ; From e2dd63340d8c998bbeead9fca580898f148529d3 Mon Sep 17 00:00:00 2001 From: Renaud-K Date: Wed, 2 Nov 2022 16:02:04 -0700 Subject: [PATCH 166/516] Fixing flang's definition of MLIR_MAIN_SRC_DIR --- flang/CMakeLists.txt | 4 ++-- flang/include/flang/Optimizer/Dialect/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index eff8560c65ba9..8f00c25bfc2fe 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -200,8 +200,8 @@ else() set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) endif() - set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root - set(MLIR_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --includedir + set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir ) # --src-root + set(MLIR_INCLUDE_DIR ${MLIR_MAIN_SRC_DIR}/include ) # --includedir set(MLIR_TABLEGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/tools/mlir/include) include_directories(SYSTEM ${MLIR_INCLUDE_DIR}) include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt index 8f3d47cf6c8c6..d657e3f166903 100644 --- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt @@ -26,7 +26,7 @@ add_custom_target(flang-doc) set(dialect_doc_filename "FIRLangRef") set(LLVM_TARGET_DEFINITIONS FIROps.td) -tablegen(MLIR ${dialect_doc_filename}.md -gen-op-doc "-I${MLIR_MAIN_SRC_DIR}" "-I${MLIR_INCLUDE_DIR}") +tablegen(MLIR ${dialect_doc_filename}.md -gen-op-doc "-I${MLIR_INCLUDE_DIR}") set(GEN_DOC_FILE ${FLANG_BINARY_DIR}/docs/Dialect/${dialect_doc_filename}.md) add_custom_command( OUTPUT ${GEN_DOC_FILE} From 7e133eb49b35b1648de786f21f38db084f597b7f Mon Sep 17 00:00:00 2001 From: Emilio Cota Date: Thu, 3 Nov 2022 10:47:25 -0400 Subject: [PATCH 167/516] [mlir][bufferize] Add filterFn option to BufferResultsToOutParams This allows users to restrict the transformation to a subset of the functions in a module. For example, a user might want to apply the transformation to a module's entry point, but not to the calls in the module because those calls might refer to external C functions outside of their control. Reviewed By: springerm, nicolasvasilache Differential Revision: https://reviews.llvm.org/D137264 --- .../Dialect/Bufferization/Transforms/Passes.h | 17 +++++++- .../Transforms/BufferResultsToOutParams.cpp | 39 +++++++++++++++---- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index aa3f6423407c7..445430ac21a00 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -35,12 +35,25 @@ std::unique_ptr createBufferHoistingPass(); /// reallocations inside of loops. std::unique_ptr createBufferLoopHoistingPass(); +// Options struct for BufferResultsToOutParams pass. +// Note: defined only here, not in tablegen. +struct BufferResultsToOutParamsOptions { + // Filter function; returns true if the function should be converted. + // Defaults to true, i.e. all functions are converted. + llvm::function_ref filterFn = [](func::FuncOp *func) { + return true; + }; +}; + /// Creates a pass that converts memref function results to out-params. -std::unique_ptr createBufferResultsToOutParamsPass(); +std::unique_ptr createBufferResultsToOutParamsPass( + const BufferResultsToOutParamsOptions &options = {}); /// Replace buffers that are returned from a function with an out parameter. /// Also update all call sites. -LogicalResult promoteBufferResultsToOutParams(ModuleOp module); +LogicalResult +promoteBufferResultsToOutParams(ModuleOp module, + const BufferResultsToOutParamsOptions &options); /// Creates a pass that drops memref function results that are equivalent to a /// function argument. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp index 996e7b729c373..bff3b664ede55 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp @@ -119,9 +119,21 @@ static void updateReturnOps(func::FuncOp func, // Updates all CallOps in the scope of the given ModuleOp by allocating // temporary buffers for newly introduced out params. -static LogicalResult updateCalls(ModuleOp module) { +static LogicalResult +updateCalls(ModuleOp module, + const bufferization::BufferResultsToOutParamsOptions &options) { bool didFail = false; + SymbolTable symtab(module); module.walk([&](func::CallOp op) { + auto callee = symtab.lookup(op.getCallee()); + if (!callee) { + op.emitError() << "cannot find callee '" << op.getCallee() << "' in " + << "symbol table"; + didFail = true; + return; + } + if (!options.filterFn(&callee)) + return; SmallVector replaceWithNewCallResults; SmallVector replaceWithOutParams; for (OpResult result : op.getResults()) { @@ -169,9 +181,12 @@ static LogicalResult updateCalls(ModuleOp module) { return failure(didFail); } -LogicalResult -mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) { +LogicalResult mlir::bufferization::promoteBufferResultsToOutParams( + ModuleOp module, + const bufferization::BufferResultsToOutParamsOptions &options) { for (auto func : module.getOps()) { + if (!options.filterFn(&func)) + continue; SmallVector appendedEntryArgs; if (failed(updateFuncOp(func, appendedEntryArgs))) return failure(); @@ -179,7 +194,7 @@ mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) { continue; updateReturnOps(func, appendedEntryArgs); } - if (failed(updateCalls(module))) + if (failed(updateCalls(module, options))) return failure(); return success(); } @@ -188,14 +203,22 @@ namespace { struct BufferResultsToOutParamsPass : bufferization::impl::BufferResultsToOutParamsBase< BufferResultsToOutParamsPass> { + explicit BufferResultsToOutParamsPass( + const bufferization::BufferResultsToOutParamsOptions &options) + : options(options) {} + void runOnOperation() override { - if (failed(bufferization::promoteBufferResultsToOutParams(getOperation()))) + if (failed(bufferization::promoteBufferResultsToOutParams(getOperation(), + options))) return signalPassFailure(); } + +private: + bufferization::BufferResultsToOutParamsOptions options; }; } // namespace -std::unique_ptr -mlir::bufferization::createBufferResultsToOutParamsPass() { - return std::make_unique(); +std::unique_ptr mlir::bufferization::createBufferResultsToOutParamsPass( + const bufferization::BufferResultsToOutParamsOptions &options) { + return std::make_unique(options); } From fd2065b70f2a74503725ebadb39c5dd2f9aa15c9 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 3 Nov 2022 10:46:48 -0700 Subject: [PATCH 168/516] Have GetSupportedArchitectures report all supported arches PlatformDarwinKernel::GetSupportedArchitectures returns a list of architectures that are possible for this platform; it was using a compile-time check for the debug host to decide the list of arches that were valid. This was copied from a codepath doing native process debugging, and was clearly wrong for kernel debugging, but it had not happened to cause problems so it went unnoticed for a long time. Small NFC change to the logging messages of Target::SetArchitecture to make them a little more explicit about how the architecture is being modified/replaced. Differential Revision: https://reviews.llvm.org/D137301 rdar://101690111 --- .../Platform/MacOSX/PlatformDarwinKernel.cpp | 3 --- lldb/source/Target/Target.cpp | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index 8ae211f102cb3..3c9cc8e77189e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -986,11 +986,8 @@ bool PlatformDarwinKernel::LoadPlatformBinaryAndSetup(Process *process, std::vector PlatformDarwinKernel::GetSupportedArchitectures( const ArchSpec &process_host_arch) { std::vector result; -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) ARMGetSupportedArchitectures(result); -#else x86GetSupportedArchitectures(result); -#endif return result; } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f1a311b7252cb..33a792b683ca4 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -1436,7 +1436,8 @@ void Target::SetExecutableModule(ModuleSP &executable_sp, if (!m_arch.GetSpec().IsValid()) { m_arch = executable_sp->GetArchitecture(); LLDB_LOG(log, - "setting architecture to {0} ({1}) based on executable file", + "Target::SetExecutableModule setting architecture to {0} ({1}) " + "based on executable file", m_arch.GetSpec().GetArchitectureName(), m_arch.GetSpec().GetTriple().getTriple()); } @@ -1536,7 +1537,9 @@ bool Target::SetArchitecture(const ArchSpec &arch_spec, bool set_platform, // specified if (replace_local_arch) m_arch = other; - LLDB_LOG(log, "set architecture to {0} ({1})", + LLDB_LOG(log, + "Target::SetArchitecture merging compatible arch; arch " + "is now {0} ({1})", m_arch.GetSpec().GetArchitectureName(), m_arch.GetSpec().GetTriple().getTriple()); return true; @@ -1544,9 +1547,13 @@ bool Target::SetArchitecture(const ArchSpec &arch_spec, bool set_platform, // If we have an executable file, try to reset the executable to the desired // architecture - LLDB_LOGF(log, "Target::SetArchitecture changing architecture to %s (%s)", - arch_spec.GetArchitectureName(), - arch_spec.GetTriple().getTriple().c_str()); + LLDB_LOGF( + log, + "Target::SetArchitecture changing architecture to %s (%s) from %s (%s)", + arch_spec.GetArchitectureName(), + arch_spec.GetTriple().getTriple().c_str(), + m_arch.GetSpec().GetArchitectureName(), + m_arch.GetSpec().GetTriple().getTriple().c_str()); m_arch = other; ModuleSP executable_sp = GetExecutableModule(); From 5d7fdf67f622507b852af980d516f49ff2d0e380 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Thu, 3 Nov 2022 10:45:18 -0700 Subject: [PATCH 169/516] [NFC][AArch64]Precommit test for D135102 - Run auto updater for 'trunc-to-tbl.ll' and 'build-pair-isel.ll' - Add the motivating test of D135102 to 'bitfield-insert.ll' Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D137296 --- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 38 ++ llvm/test/CodeGen/AArch64/build-pair-isel.ll | 15 +- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 480 +++++++++---------- 3 files changed, 292 insertions(+), 241 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index a27e293ffe881..3bd320dc40b54 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -601,3 +601,41 @@ define i64 @test_and_extended_shift_with_imm(i64 %0) { %3 = and i64 %2, 32640 ; #0x7f80 ret i64 %3 } + +; orr with left-shifted operand is better than bfi, since it improves data +; dependency, and orr has a smaller latency and higher throughput than bfm on +; some AArch64 processors (for the rest, orr is at least as good as bfm) +; +; ubfx x8, x0, #8, #7 +; and x9, x0, #0x7f +; orr x0, x9, x8, lsl #7 +define i64 @test_orr_not_bfxil_i64(i64 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #1 +; CHECK-NEXT: and x8, x8, #0x3f80 +; CHECK-NEXT: bfxil x8, x0, #0, #7 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %2 = and i64 %0, 127 + %3 = lshr i64 %0, 1 + %4 = and i64 %3, 16256 ; 0x3f80 + %5 = or i64 %4, %2 + ret i64 %5 +} + +; The 32-bit test for `test_orr_not_bfxil_i64`. +define i32 @test_orr_not_bfxil_i32(i32 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #1 +; CHECK-NEXT: and w8, w8, #0x3f80 +; CHECK-NEXT: bfxil w8, w0, #0, #7 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %2 = and i32 %0, 127 + %3 = lshr i32 %0, 1 + %4 = and i32 %3, 16256 ; 0x3f80 + %5 = or i32 %4, %2 + ret i32 %5 +} diff --git a/llvm/test/CodeGen/AArch64/build-pair-isel.ll b/llvm/test/CodeGen/AArch64/build-pair-isel.ll index c9c5098017389..b9f03ed872cfd 100644 --- a/llvm/test/CodeGen/AArch64/build-pair-isel.ll +++ b/llvm/test/CodeGen/AArch64/build-pair-isel.ll @@ -1,10 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 -o - -O0 %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" ; This test checks we don't fail isel due to unhandled build_pair nodes. -; CHECK: bfi define void @compare_and_swap128() { +; CHECK-LABEL: compare_and_swap128: +; CHECK: // %bb.0: +; CHECK-NEXT: //APP +; CHECK-NEXT: nop +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: // implicit-def: $x9 +; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: mov w8, w8 +; CHECK-NEXT: // kill: def $x8 killed $w8 +; CHECK-NEXT: bfi x8, x9, #32, #32 +; CHECK-NEXT: // implicit-def: $x9 +; CHECK-NEXT: str x8, [x9] +; CHECK-NEXT: ret %1 = call i128 asm sideeffect "nop", "=r,~{memory}"() store i128 %1, i128* undef, align 16 ret void diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 392bd243b7a4d..85f20ba18cc13 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -236,65 +236,65 @@ exit: } define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB3_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8, lsl #7 -; CHECK-NEXT: ldp q3, q2, [x9, #96] -; CHECK-NEXT: ldp q1, q0, [x9, #32] -; CHECK-NEXT: uzp1.4s v2, v3, v2 -; CHECK-NEXT: ldp q5, q4, [x9, #64] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: ldp q3, q6, [x9] -; CHECK-NEXT: uzp1.4s v4, v5, v4 -; CHECK-NEXT: uzp1.8h v2, v4, v2 -; CHECK-NEXT: uzp1.4s v1, v3, v6 -; CHECK-NEXT: uzp1.8h v0, v1, v0 -; CHECK-NEXT: uzp1.16b v0, v0, v2 -; CHECK-NEXT: str q0, [x1, x8, lsl #4] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB3_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB3_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 -; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v5.2d }, [x9] -; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #80 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x11] -; CHECK-BE-NEXT: add x11, x9, #112 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #96 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] -; CHECK-BE-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #64 -; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] -; CHECK-BE-NEXT: ld1 { v7.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 -; CHECK-BE-NEXT: uzp1 v3.4s, v4.4s, v3.4s -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: uzp1 v2.4s, v6.4s, v2.4s -; CHECK-BE-NEXT: uzp1 v1.4s, v5.4s, v7.4s -; CHECK-BE-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-BE-NEXT: st1 { v0.16b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB3_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB3_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #7 +; CHECK-NEXT: ldp q3, q2, [x9, #96] +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: ldp q5, q4, [x9, #64] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: ldp q3, q6, [x9] +; CHECK-NEXT: uzp1.4s v4, v5, v4 +; CHECK-NEXT: uzp1.8h v2, v4, v2 +; CHECK-NEXT: uzp1.4s v1, v3, v6 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: uzp1.16b v0, v0, v2 +; CHECK-NEXT: str q0, [x1, x8, lsl #4] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB3_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB3_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: add x11, x9, #32 +; CHECK-BE-NEXT: ld1 { v5.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #80 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x11] +; CHECK-BE-NEXT: add x11, x9, #112 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #96 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] +; CHECK-BE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #64 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v7.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 +; CHECK-BE-NEXT: uzp1 v3.4s, v4.4s, v3.4s +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v2.4s, v6.4s, v2.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v5.4s, v7.4s +; CHECK-BE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-BE-NEXT: st1 { v0.16b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB3_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -315,49 +315,49 @@ exit: } define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB4_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8, lsl #6 -; CHECK-NEXT: ldp q1, q0, [x9, #32] -; CHECK-NEXT: ldp q3, q2, [x9] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: uzp1.4s v1, v3, v2 -; CHECK-NEXT: uzp1.8h v0, v1, v0 -; CHECK-NEXT: xtn.8b v0, v0 -; CHECK-NEXT: str d0, [x1, x8, lsl #3] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB4_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB4_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 -; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] -; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #32 -; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: uzp1 v0.4s, v2.4s, v0.4s -; CHECK-BE-NEXT: uzp1 v1.4s, v1.4s, v3.4s -; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: st1 { v0.8b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB4_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB4_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #6 +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: ldp q3, q2, [x9] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: uzp1.4s v1, v3, v2 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str d0, [x1, x8, lsl #3] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB4_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB4_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v0.4s, v2.4s, v0.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v1.4s, v3.4s +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: st1 { v0.8b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB4_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -378,83 +378,83 @@ exit: } define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB5_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp x10, x9, [x0] -; CHECK-NEXT: ldrb w11, [x0, #18] -; CHECK-NEXT: ldrh w13, [x0, #16] -; CHECK-NEXT: add x0, x0, #32 -; CHECK-NEXT: lsr x14, x10, #19 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: ubfx x12, x9, #12, #20 -; CHECK-NEXT: lsr x15, x9, #31 -; CHECK-NEXT: bfi w13, w11, #16, #8 -; CHECK-NEXT: lsr x11, x9, #50 -; CHECK-NEXT: mov.s v0[1], w14 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: lsr x12, x10, #38 -; CHECK-NEXT: bfi w11, w13, #14, #18 -; CHECK-NEXT: lsr x10, x10, #57 -; CHECK-NEXT: bfi w10, w9, #7, #25 -; CHECK-NEXT: lsr w9, w13, #5 -; CHECK-NEXT: mov.s v1[1], w15 -; CHECK-NEXT: mov.s v0[2], w12 -; CHECK-NEXT: mov.s v1[2], w11 -; CHECK-NEXT: mov.s v0[3], w10 -; CHECK-NEXT: mov.s v1[3], w9 -; CHECK-NEXT: uzp1.8h v0, v0, v1 -; CHECK-NEXT: xtn.8b v0, v0 -; CHECK-NEXT: str d0, [x1, x8, lsl #3] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB5_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB5_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrh w15, [x0, #16] -; CHECK-BE-NEXT: lsr x12, x10, #40 -; CHECK-BE-NEXT: lsr x13, x10, #45 -; CHECK-BE-NEXT: lsr x11, x9, #40 -; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 -; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 -; CHECK-BE-NEXT: bfi w16, w12, #14, #18 -; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 -; CHECK-BE-NEXT: bfi w14, w11, #7, #24 -; CHECK-BE-NEXT: ldrb w11, [x0, #18] -; CHECK-BE-NEXT: fmov s0, w13 -; CHECK-BE-NEXT: add x0, x0, #32 -; CHECK-BE-NEXT: fmov s1, w14 -; CHECK-BE-NEXT: bfi w11, w15, #8, #16 -; CHECK-BE-NEXT: mov v0.s[1], w16 -; CHECK-BE-NEXT: mov v1.s[1], w12 -; CHECK-BE-NEXT: extr x12, x10, x9, #40 -; CHECK-BE-NEXT: lsl x9, x9, #24 -; CHECK-BE-NEXT: ubfx x10, x10, #7, #25 -; CHECK-BE-NEXT: orr w9, w11, w9 -; CHECK-BE-NEXT: lsr w9, w9, #19 -; CHECK-BE-NEXT: mov v0.s[2], w10 -; CHECK-BE-NEXT: ubfx x10, x12, #12, #20 -; CHECK-BE-NEXT: mov v1.s[2], w9 -; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: mov v0.s[3], w10 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: mov v1.s[3], w11 -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: st1 { v0.8b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB5_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB5_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp x10, x9, [x0] +; CHECK-NEXT: ldrb w11, [x0, #18] +; CHECK-NEXT: ldrh w13, [x0, #16] +; CHECK-NEXT: add x0, x0, #32 +; CHECK-NEXT: lsr x14, x10, #19 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: ubfx x12, x9, #12, #20 +; CHECK-NEXT: lsr x15, x9, #31 +; CHECK-NEXT: bfi w13, w11, #16, #8 +; CHECK-NEXT: lsr x11, x9, #50 +; CHECK-NEXT: mov.s v0[1], w14 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: lsr x12, x10, #38 +; CHECK-NEXT: bfi w11, w13, #14, #18 +; CHECK-NEXT: lsr x10, x10, #57 +; CHECK-NEXT: bfi w10, w9, #7, #25 +; CHECK-NEXT: lsr w9, w13, #5 +; CHECK-NEXT: mov.s v1[1], w15 +; CHECK-NEXT: mov.s v0[2], w12 +; CHECK-NEXT: mov.s v1[2], w11 +; CHECK-NEXT: mov.s v0[3], w10 +; CHECK-NEXT: mov.s v1[3], w9 +; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str d0, [x1, x8, lsl #3] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB5_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB5_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldp x10, x9, [x0] +; CHECK-BE-NEXT: ldrh w15, [x0, #16] +; CHECK-BE-NEXT: lsr x12, x10, #40 +; CHECK-BE-NEXT: lsr x13, x10, #45 +; CHECK-BE-NEXT: lsr x11, x9, #40 +; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 +; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 +; CHECK-BE-NEXT: bfi w16, w12, #14, #18 +; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 +; CHECK-BE-NEXT: bfi w14, w11, #7, #24 +; CHECK-BE-NEXT: ldrb w11, [x0, #18] +; CHECK-BE-NEXT: fmov s0, w13 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: fmov s1, w14 +; CHECK-BE-NEXT: bfi w11, w15, #8, #16 +; CHECK-BE-NEXT: mov v0.s[1], w16 +; CHECK-BE-NEXT: mov v1.s[1], w12 +; CHECK-BE-NEXT: extr x12, x10, x9, #40 +; CHECK-BE-NEXT: lsl x9, x9, #24 +; CHECK-BE-NEXT: ubfx x10, x10, #7, #25 +; CHECK-BE-NEXT: orr w9, w11, w9 +; CHECK-BE-NEXT: lsr w9, w9, #19 +; CHECK-BE-NEXT: mov v0.s[2], w10 +; CHECK-BE-NEXT: ubfx x10, x12, #12, #20 +; CHECK-BE-NEXT: mov v1.s[2], w9 +; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: mov v0.s[3], w10 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: mov v1.s[3], w11 +; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: st1 { v0.8b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB5_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -475,67 +475,67 @@ exit: } define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #1000 -; CHECK-NEXT: LBB6_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q1, q0, [x0, #32] -; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: add x10, x1, #10 -; CHECK-NEXT: subs x8, x8, #1 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: ldr d4, [x0, #80] -; CHECK-NEXT: ldr q1, [x0, #64] -; CHECK-NEXT: add x0, x0, #128 -; CHECK-NEXT: uzp1.4s v2, v3, v2 -; CHECK-NEXT: uzp1.4s v1, v1, v4 -; CHECK-NEXT: uzp1.8h v0, v2, v0 -; CHECK-NEXT: xtn.4h v1, v1 -; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: xtn.8b v1, v1 -; CHECK-NEXT: st1.b { v1 }[2], [x10] -; CHECK-NEXT: str d0, [x1], #16 -; CHECK-NEXT: st1.h { v0 }[4], [x9] -; CHECK-NEXT: b.eq LBB6_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov w8, #1000 -; CHECK-BE-NEXT:.LBB6_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, #48 -; CHECK-BE-NEXT: add x10, x0, #32 -; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] -; CHECK-BE-NEXT: subs x8, x8, #1 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #16 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #64 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, #10 -; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #8 -; CHECK-BE-NEXT: uzp1 v1.4s, v2.4s, v1.4s -; CHECK-BE-NEXT: ldr d2, [x0, #80] -; CHECK-BE-NEXT: add x0, x0, #128 -; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v3.4s -; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-BE-NEXT: xtn v1.4h, v2.4s -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-BE-NEXT: xtn v1.8b, v1.8h -; CHECK-BE-NEXT: st1 { v1.b }[2], [x9] -; CHECK-BE-NEXT: rev64 v2.16b, v0.16b -; CHECK-BE-NEXT: rev16 v0.16b, v0.16b -; CHECK-BE-NEXT: str d2, [x1], #16 -; CHECK-BE-NEXT: st1 { v0.h }[4], [x10] -; CHECK-BE-NEXT: b.eq .LBB6_1 -; CHECK-BE-NEXT:// %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov w8, #1000 +; CHECK-NEXT: LBB6_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: add x10, x1, #10 +; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: ldr d4, [x0, #80] +; CHECK-NEXT: ldr q1, [x0, #64] +; CHECK-NEXT: add x0, x0, #128 +; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: uzp1.4s v1, v1, v4 +; CHECK-NEXT: uzp1.8h v0, v2, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: uzp1.16b v0, v0, v1 +; CHECK-NEXT: xtn.8b v1, v1 +; CHECK-NEXT: st1.b { v1 }[2], [x10] +; CHECK-NEXT: str d0, [x1], #16 +; CHECK-NEXT: st1.h { v0 }[4], [x9] +; CHECK-NEXT: b.eq LBB6_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov w8, #1000 +; CHECK-BE-NEXT: .LBB6_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: add x10, x0, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: subs x8, x8, #1 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] +; CHECK-BE-NEXT: add x9, x0, #16 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: add x10, x0, #64 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #10 +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x1, #8 +; CHECK-BE-NEXT: uzp1 v1.4s, v2.4s, v1.4s +; CHECK-BE-NEXT: ldr d2, [x0, #80] +; CHECK-BE-NEXT: add x0, x0, #128 +; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v3.4s +; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s +; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-BE-NEXT: xtn v1.4h, v2.4s +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-BE-NEXT: xtn v1.8b, v1.8h +; CHECK-BE-NEXT: st1 { v1.b }[2], [x9] +; CHECK-BE-NEXT: rev64 v2.16b, v0.16b +; CHECK-BE-NEXT: rev16 v0.16b, v0.16b +; CHECK-BE-NEXT: str d2, [x1], #16 +; CHECK-BE-NEXT: st1 { v0.h }[4], [x10] +; CHECK-BE-NEXT: b.eq .LBB6_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop From 74bace2dfe57d9cf569addf94af4e01a990d2374 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 3 Nov 2022 11:33:50 -0700 Subject: [PATCH 170/516] Revert "[AArch64] Improve codegen for shifted mask op" This reverts commit b4e1466c35d3ca3e04244e8e8b4ffaf0784d6d37. This causes a crash while building the Linux kernel. See the original Phabricator review for a reduced C and LLVM IR reproducer. --- .../Target/AArch64/AArch64ISelLowering.cpp | 20 +++++----------- llvm/test/CodeGen/AArch64/shift-logic.ll | 24 ------------------- 2 files changed, 6 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 17c489b2fb5ad..6571ddd7cb12b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14442,23 +14442,15 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not - // combine it with shift 'N' to let it be lowered to UBFX except: - // ((x >> C) & mask) << C. + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift 'N' to let it be lowered to UBFX. if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && isa(ShiftLHS.getOperand(1))) { uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); - if (isMask_64(TruncMask)) { - SDValue AndLHS = ShiftLHS.getOperand(0); - if (AndLHS.getOpcode() == ISD::SRL) { - if (auto *SRLC = dyn_cast(AndLHS.getOperand(1))) { - if (N->getOpcode() == ISD::SHL) - if (auto *SHLC = dyn_cast(N->getOperand(1))) - return SRLC->getAPIntValue() == SHLC->getAPIntValue(); - return false; - } - } - } + if (isMask_64(TruncMask) && + ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && + isa(ShiftLHS.getOperand(0).getOperand(1))) + return false; } return true; } diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll index 9a7cf004b3b74..af684bbb8aff7 100644 --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -151,27 +151,3 @@ define i32 @lshr_or_extra_use(i32 %x, i32 %y, i32* %p) nounwind { %sh1 = lshr i32 %r, 7 ret i32 %sh1 } - -define i64 @desirable_to_commute1(i64 %x) { -; CHECK-LABEL: desirable_to_commute1: -; CHECK: // %bb.0: -; CHECK-NEXT: and x0, x0, #0x7fff8 -; CHECK-NEXT: ret - %s1 = lshr i64 %x, 3 - %a = and i64 %s1, 65535 - %s2 = shl i64 %a, 3 - ret i64 %s2 -} - -define i64 @desirable_to_commute2(i64* %p, i64 %i) { -; CHECK-LABEL: desirable_to_commute2: -; CHECK: // %bb.0: -; CHECK-NEXT: and x8, x1, #0x1ff8 -; CHECK-NEXT: ldr x0, [x0, x8] -; CHECK-NEXT: ret - %lshr = lshr i64 %i, 3 - %and = and i64 %lshr, 1023 - %pidx = getelementptr i64, i64* %p, i64 %and - %r = load i64, i64* %pidx - ret i64 %r -} From b36d678b8e75ec8eb09d2b7e9513884f99d15f0a Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Thu, 3 Nov 2022 13:24:13 +0000 Subject: [PATCH 171/516] [flang]Fix build failure in tests After submitting the DataLayout fix, some tests fail when they didn't before. This has to do with the target essentially being ignored when these tests were run earlier, as the --target x86-unknown-linux-gnu only has to be correctly formed to be accepted. Now the target triple is actually being used to get the targetmachine earlier - before MLIR is generated - so the test that has a valid target but not available on the platform fails. Fix is to require x86 registered target when running those tests. Reviewed By: awarzynski Differential Revision: https://reviews.llvm.org/D137335 --- flang/test/Fir/boxchar.fir | 4 ++-- flang/test/Fir/inline.fir | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang/test/Fir/boxchar.fir b/flang/test/Fir/boxchar.fir index 06d66202ff894..4c5fdec2184ed 100644 --- a/flang/test/Fir/boxchar.fir +++ b/flang/test/Fir/boxchar.fir @@ -1,5 +1,5 @@ -// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s -// RUN: %flang_fc1 -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s +// RUN: tco %s | FileCheck %s +// RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s // Test of building and passing boxchar. diff --git a/flang/test/Fir/inline.fir b/flang/test/Fir/inline.fir index 48f10c1e85198..c2ed2bf422ead 100644 --- a/flang/test/Fir/inline.fir +++ b/flang/test/Fir/inline.fir @@ -1,5 +1,5 @@ -// RUN: tco --target=x86_64-unknown-linux-gnu --inline-all %s -o - | FileCheck %s -// RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -mmlir --inline-all -emit-llvm %s -o - | FileCheck %s +// RUN: tco --inline-all %s -o - | FileCheck %s +// RUN: %flang_fc1 -mmlir --inline-all -emit-llvm %s -o - | FileCheck %s // CHECK-LABEL: @add func.func @add(%a : i32, %b : i32) -> i32 { From 8086b0c8a883ea257519ff48d4445c8ff6a717a0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 3 Nov 2022 19:05:45 +0000 Subject: [PATCH 172/516] [ConstraintElim] Drop bail out for scalable vectors after using getTrue ConstantInt::getTrue/getFalse can materialize scalable vectors with all lanes true/false. --- llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 6 ------ .../test/Transforms/ConstraintElimination/geps-ptrvector.ll | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 9c304d1f955c8..bd214e004a022 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -749,12 +749,6 @@ void State::addInfoFor(BasicBlock &BB) { static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); - // TODO: Implement splat of boolean value for scalable vectors. - if (isa(Cmp->getType())) { - LLVM_DEBUG(dbgs() << " skipping due to scalable vectors\n"); - return false; - } - CmpInst::Predicate Pred = Cmp->getPredicate(); Value *A = Cmp->getOperand(0); Value *B = Cmp->getOperand(1); diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index 0f5a28b3c5671..93d940f2de3ba 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -40,7 +40,7 @@ define @test.scalable.vectorgep.ult.true( %v ; CHECK-LABEL: @test.scalable.vectorgep.ult.true( ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 ; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[VEC]], [[GEP_1]] -; CHECK-NEXT: ret [[T_1]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) ; %gep.1 = getelementptr inbounds i32, %vec, i64 1 %t.1 = icmp ult %vec, %gep.1 @@ -51,7 +51,7 @@ define @test.scalable.vectorgep.ult.false( % ; CHECK-LABEL: @test.scalable.vectorgep.ult.false( ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 ; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[GEP_1]], [[VEC]] -; CHECK-NEXT: ret [[T_1]] +; CHECK-NEXT: ret zeroinitializer ; %gep.1 = getelementptr inbounds i32, %vec, i64 1 %t.1 = icmp ult %gep.1, %vec From f62d8a1a5044df7b8d72033d056375b4ab256012 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Wed, 2 Nov 2022 16:28:49 -0700 Subject: [PATCH 173/516] [AArch64] Compare BFI and ORR with left-shifted operand for OR instruction selection. Before this patch: - For `r = or op0, op1`, `tryBitfieldInsertOpFromOr` combines it to BFI when 1) one of the two operands is bit-field-positioning or bit-field-extraction op; and 2) bits from the two operands don't overlap After this patch: - Right before OR is combined to BFI, evaluates if ORR with left-shifted operand is better. A motivating example (https://godbolt.org/z/rnMrzs5vn, which is added as a test case in `test_orr_not_bfi` in `CodeGen/AArch64/bitfield-insert.ll`) For IR: ``` define i64 @test_orr_not_bfxil(i64 %0) { %2 = and i64 %0, 127 %3 = lshr i64 %0, 1 %4 = and i64 %3, 16256 %5 = or i64 %4, %2 ret i64 %5 } ``` Before: ``` lsr x8, x0, #1 and x8, x8, #0x3f80 bfxil x8, x0, #0, #7 ``` After: ``` ubfx x8, x0, #8, #7 and x9, x0, #0x7f orr x0, x9, x8, lsl #7 ``` Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D135102 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 124 ++++++++++++++++++ .../CodeGen/AArch64/arm64-bitfield-extract.ll | 10 +- .../CodeGen/AArch64/arm64-non-pow2-ldst.ll | 30 ++--- .../CodeGen/AArch64/arm64-strict-align.ll | 4 +- llvm/test/CodeGen/AArch64/arm64_32.ll | 5 +- llvm/test/CodeGen/AArch64/bfis-in-loop.ll | 8 +- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 17 +-- llvm/test/CodeGen/AArch64/build-pair-isel.ll | 4 +- llvm/test/CodeGen/AArch64/funnel-shift-rot.ll | 3 +- .../AArch64/load-combine-big-endian.ll | 20 +-- llvm/test/CodeGen/AArch64/load-combine.ll | 24 ++-- llvm/test/CodeGen/AArch64/logic-shift.ll | 3 +- llvm/test/CodeGen/AArch64/nontemporal-load.ll | 40 +++--- llvm/test/CodeGen/AArch64/rotate-extract.ll | 4 +- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 36 ++--- llvm/test/CodeGen/AArch64/urem-seteq.ll | 4 +- llvm/test/CodeGen/AArch64/vec_uaddo.ll | 13 +- llvm/test/CodeGen/AArch64/vec_umulo.ll | 13 +- 18 files changed, 241 insertions(+), 121 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 844f9c0c7159a..de44144dc25bc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2803,6 +2803,122 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { return true; } +static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst, + SelectionDAG *CurDAG, + SDValue &LeftShiftedOperand, + uint64_t &LeftShiftAmount) { + // Avoid folding Dst into ORR-with-left-shift if Dst has other uses than ORR. + if (!Dst.hasOneUse()) + return false; + + EVT VT = Dst.getValueType(); + assert((VT == MVT::i32 || VT == MVT::i64) && + "Caller should guarantee that VT is one of i32 or i64"); + const unsigned SizeInBits = VT.getSizeInBits(); + + SDLoc DL(Dst.getNode()); + uint64_t AndImm, ShlImm; + if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && + isShiftedMask_64(AndImm)) { + // Avoid transforming 'DstOp0' if it has other uses than the AND node. + SDValue DstOp0 = Dst.getOperand(0); + if (!DstOp0.hasOneUse()) + return false; + + // An example to illustrate the transformation + // From: + // lsr x8, x1, #1 + // and x8, x8, #0x3f80 + // bfxil x8, x1, #0, #7 + // To: + // and x8, x23, #0x7f + // ubfx x9, x23, #8, #7 + // orr x23, x8, x9, lsl #7 + // + // The number of instructions remains the same, but ORR is faster than BFXIL + // on many AArch64 processors (or as good as BFXIL if not faster). Besides, + // the dependency chain is improved after the transformation. + uint64_t SrlImm; + if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { + uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm); + if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { + unsigned MaskWidth = + countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask); + unsigned UBFMOpc = + (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; + SDNode *UBFMNode = CurDAG->getMachineNode( + UBFMOpc, DL, VT, DstOp0.getOperand(0), + CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, + VT), + CurDAG->getTargetConstant( + SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); + LeftShiftedOperand = SDValue(UBFMNode, 0); + LeftShiftAmount = NumTrailingZeroInShiftedMask; + return true; + } + } + } else if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { + LeftShiftedOperand = Dst.getOperand(0); + LeftShiftAmount = ShlImm; + return true; + } + // FIXME: Extend the implementation to optimize if Dst is an SRL node. + return false; +} + +static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, + SDValue Src, SDValue Dst, SelectionDAG *CurDAG, + const bool BiggerPattern) { + EVT VT = N->getValueType(0); + assert((VT == MVT::i32 || VT == MVT::i64) && + "Expect result type to be i32 or i64 since N is combinable to BFM"); + SDLoc DL(N); + + // Bail out if BFM simplifies away one node in BFM Dst. + if (OrOpd1 != Dst) + return false; + + // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer + // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. + if (BiggerPattern) { + uint64_t SrcAndImm; + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && + isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { + // OrOpd0 = AND Src, #Mask + // So BFM simplifies away one AND node from Src and doesn't simplify away + // nodes from Dst. If ORR with left-shifted operand also simplifies away + // one node (from Rd), ORR is better since it has higher throughput and + // smaller latency than BFM on many AArch64 processors (and for the rest + // ORR is at least as good as BFM). + SDValue LeftShiftedOperand; + uint64_t LeftShiftAmount; + if (isWorthFoldingIntoOrrWithLeftShift(Dst, CurDAG, LeftShiftedOperand, + LeftShiftAmount)) { + unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; + SDValue Ops[] = {OrOpd0, LeftShiftedOperand, + CurDAG->getTargetConstant(LeftShiftAmount, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + } + return false; + } + + assert((!BiggerPattern) && "BiggerPattern should be handled above"); + + uint64_t ShlImm; + // FIXME: Extend the implementation if OrOpd0 is an SRL node. + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) && + OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { + unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; + SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ShlImm, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + + return false; +} + static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); @@ -2905,6 +3021,14 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // or is useful because it discards more bits Dst = OrOpd1Val; + // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR + // with left-shifted operand is more efficient. + // FIXME: Extend this to compare AArch64::BFM and AArch64::ORR with + // right-shifted operand as well. + if (tryOrrWithLeftShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, + BiggerPattern)) + return true; + // both parts match SDLoc DL(N); SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll index 86ef69ff4e936..cf72e4b1fce9b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -964,9 +964,9 @@ entry: define i16 @test_ignored_rightbits(i32 %dst, i32 %in) { ; LLC-LABEL: test_ignored_rightbits: ; LLC: // %bb.0: -; LLC-NEXT: and w0, w0, #0x7 -; LLC-NEXT: bfi w0, w1, #3, #4 -; LLC-NEXT: bfi w0, w0, #8, #7 +; LLC-NEXT: and w8, w0, #0x7 +; LLC-NEXT: bfi w8, w1, #3, #4 +; LLC-NEXT: orr w0, w8, w8, lsl #8 ; LLC-NEXT: ret ; OPT-LABEL: @test_ignored_rightbits( ; OPT-NEXT: [[POSITIONED_FIELD:%.*]] = shl i32 [[IN:%.*]], 3 @@ -1000,8 +1000,8 @@ define void @sameOperandBFI(i64 %src, i64 %src2, i16 *%ptr) { ; LLC-NEXT: lsr x8, x0, #47 ; LLC-NEXT: and w9, w1, #0x3 ; LLC-NEXT: bfi w9, w8, #2, #2 -; LLC-NEXT: bfi w9, w9, #4, #4 -; LLC-NEXT: strh w9, [x2] +; LLC-NEXT: orr w8, w9, w9, lsl #4 +; LLC-NEXT: strh w8, [x2] ; LLC-NEXT: .LBB30_2: // %end ; LLC-NEXT: ret ; OPT-LABEL: @sameOperandBFI( diff --git a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll index eca81e58004cb..5a44550cc172a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll @@ -5,8 +5,8 @@ define i24 @ldi24(ptr %p) nounwind { ; CHECK-LABEL: ldi24: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrh w0, [x0] -; CHECK-NEXT: bfi w0, w8, #16, #16 +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: orr w0, w9, w8, lsl #16 ; CHECK-NEXT: ret %r = load i24, i24* %p ret i24 %r @@ -17,9 +17,9 @@ define i56 @ldi56(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #6] ; CHECK-NEXT: ldrh w9, [x0, #4] -; CHECK-NEXT: ldr w0, [x0] -; CHECK-NEXT: bfi w9, w8, #16, #16 -; CHECK-NEXT: bfi x0, x9, #32, #32 +; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr x0, x10, x8, lsl #32 ; CHECK-NEXT: ret %r = load i56, i56* %p ret i56 %r @@ -41,10 +41,10 @@ define i120 @ldi120(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #14] ; CHECK-NEXT: ldrh w9, [x0, #12] -; CHECK-NEXT: ldr w1, [x0, #8] +; CHECK-NEXT: ldr w10, [x0, #8] ; CHECK-NEXT: ldr x0, [x0] -; CHECK-NEXT: bfi w9, w8, #16, #16 -; CHECK-NEXT: bfi x1, x9, #32, #32 +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr x1, x10, x8, lsl #32 ; CHECK-NEXT: ret %r = load i120, i120* %p ret i120 %r @@ -55,10 +55,10 @@ define i280 @ldi280(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x1, [x0] ; CHECK-NEXT: ldrb w9, [x0, #34] -; CHECK-NEXT: ldrh w4, [x0, #32] +; CHECK-NEXT: ldrh w10, [x0, #32] ; CHECK-NEXT: ldp x2, x3, [x0, #16] ; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: bfi x4, x9, #16, #8 +; CHECK-NEXT: orr x4, x10, x9, lsl #16 ; CHECK-NEXT: ret %r = load i280, i280* %p ret i280 %r @@ -133,7 +133,7 @@ define void @i56_or(ptr %a) { ; CHECK-NEXT: ldrh w10, [x8, #4]! ; CHECK-NEXT: ldrb w11, [x8, #2] ; CHECK-NEXT: orr w9, w9, #0x180 -; CHECK-NEXT: bfi w10, w11, #16, #16 +; CHECK-NEXT: orr w10, w10, w11, lsl #16 ; CHECK-NEXT: str w9, [x0] ; CHECK-NEXT: strb w11, [x8, #2] ; CHECK-NEXT: strh w10, [x8] @@ -153,7 +153,7 @@ define void @i56_and_or(ptr %a) { ; CHECK-NEXT: ldrb w11, [x8, #2] ; CHECK-NEXT: orr w9, w9, #0x180 ; CHECK-NEXT: and w9, w9, #0xffffff80 -; CHECK-NEXT: bfi w10, w11, #16, #16 +; CHECK-NEXT: orr w10, w10, w11, lsl #16 ; CHECK-NEXT: strb w11, [x8, #2] ; CHECK-NEXT: str w9, [x0] ; CHECK-NEXT: strh w10, [x8] @@ -172,11 +172,11 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) { ; CHECK-NEXT: ldr w11, [x0] ; CHECK-NEXT: ldrh w9, [x8, #4]! ; CHECK-NEXT: ldrb w10, [x8, #2] -; CHECK-NEXT: bfi w9, w10, #16, #8 +; CHECK-NEXT: orr w9, w9, w10, lsl #16 ; CHECK-NEXT: strb w10, [x8, #2] -; CHECK-NEXT: bfi x11, x9, #32, #24 -; CHECK-NEXT: strh w9, [x8] +; CHECK-NEXT: orr x11, x11, x9, lsl #32 ; CHECK-NEXT: and x11, x11, #0xffffffffffffdfff +; CHECK-NEXT: strh w9, [x8] ; CHECK-NEXT: orr w11, w11, w1, lsl #13 ; CHECK-NEXT: str w11, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll index 28c158f7a2eb0..a7450349766fe 100644 --- a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll @@ -5,7 +5,7 @@ define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] ; CHECK-STRICT: ldrh [[LOW:w[0-9]+]], [x0] -; CHECK-STRICT: bfi [[LOW]], [[HIGH]], #16, #16 +; CHECK-STRICT: orr w0, [[LOW]], [[HIGH]], lsl #16 ; CHECK-STRICT: ret ; CHECK: ldr w0, [x0] @@ -16,7 +16,7 @@ define i32 @f0(i32* nocapture %p) nounwind { define i64 @f1(i64* nocapture %p) nounwind { ; CHECK-STRICT: ldp w[[LOW:[0-9]+]], w[[HIGH:[0-9]+]], [x0] -; CHECK-STRICT: bfi x[[LOW]], x[[HIGH]], #32, #32 +; CHECK-STRICT: orr x0, x[[LOW]], x[[HIGH]], lsl #32 ; CHECK-STRICT: ret ; CHECK: ldr x0, [x0] diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll index fbf12e80b6b53..0eb5b637b08f9 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -662,8 +662,9 @@ define void @test_struct_hi(i32 %hi) nounwind { ; CHECK-LABEL: test_struct_hi: ; CHECK: mov w[[IN:[0-9]+]], w0 ; CHECK: bl _get_int -; CHECK-FAST-NEXT: mov w0, w0 -; CHECK-NEXT: bfi x0, x[[IN]], #32, #32 +; CHECK-FAST-NEXT: mov w[[DST:[0-9]+]], w0 +; CHECK-FAST-NEXT: orr x0, x[[DST]], x[[IN]], lsl #32 +; CHECK-OPT-NEXT: bfi x0, x[[IN]], #32, #32 ; CHECK-NEXT: bl _take_pair %val.64 = call i64 @get_int() %val.32 = trunc i64 %val.64 to i32 diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll index 6ee2feb2c2176..5207f2ba32d36 100644 --- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll +++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll @@ -28,8 +28,8 @@ define i64 @bfis_in_loop_zero() { ; CHECK-NEXT: ldr x11, [x9, #8] ; CHECK-NEXT: and x9, x10, #0xff ; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 +; CHECK-NEXT: orr x9, x9, x8, lsl #8 +; CHECK-NEXT: orr x10, x10, x12, lsl #16 ; CHECK-NEXT: orr x0, x10, x9 ; CHECK-NEXT: ldr x9, [x11, #16] ; CHECK-NEXT: cbnz x11, .LBB0_1 @@ -97,8 +97,8 @@ define i64 @bfis_in_loop_undef() { ; CHECK-NEXT: ldr x11, [x9, #8] ; CHECK-NEXT: and x9, x10, #0xff ; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 +; CHECK-NEXT: orr x9, x9, x8, lsl #8 +; CHECK-NEXT: orr x10, x10, x12, lsl #16 ; CHECK-NEXT: orr x0, x10, x9 ; CHECK-NEXT: ldr x9, [x11, #16] ; CHECK-NEXT: cbnz x11, .LBB1_1 diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index 3bd320dc40b54..b8e69d5cfaafe 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -269,8 +269,7 @@ define i32 @test_nouseful_bits(i8 %a, i32 %b) { ; CHECK-NEXT: lsl w8, w8, #8 ; CHECK-NEXT: mov w9, w8 ; CHECK-NEXT: bfxil w9, w0, #0, #8 -; CHECK-NEXT: bfi w8, w9, #16, #16 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %conv = zext i8 %a to i32 ; 0 0 0 A %shl = shl i32 %b, 8 ; B2 B1 B0 0 @@ -612,10 +611,9 @@ define i64 @test_and_extended_shift_with_imm(i64 %0) { define i64 @test_orr_not_bfxil_i64(i64 %0) { ; CHECK-LABEL: test_orr_not_bfxil_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #1 -; CHECK-NEXT: and x8, x8, #0x3f80 -; CHECK-NEXT: bfxil x8, x0, #0, #7 -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ubfx x8, x0, #8, #7 +; CHECK-NEXT: and x9, x0, #0x7f +; CHECK-NEXT: orr x0, x9, x8, lsl #7 ; CHECK-NEXT: ret %2 = and i64 %0, 127 %3 = lshr i64 %0, 1 @@ -628,10 +626,9 @@ define i64 @test_orr_not_bfxil_i64(i64 %0) { define i32 @test_orr_not_bfxil_i32(i32 %0) { ; CHECK-LABEL: test_orr_not_bfxil_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #1 -; CHECK-NEXT: and w8, w8, #0x3f80 -; CHECK-NEXT: bfxil w8, w0, #0, #7 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ubfx w8, w0, #8, #7 +; CHECK-NEXT: and w9, w0, #0x7f +; CHECK-NEXT: orr w0, w9, w8, lsl #7 ; CHECK-NEXT: ret %2 = and i32 %0, 127 %3 = lshr i32 %0, 1 diff --git a/llvm/test/CodeGen/AArch64/build-pair-isel.ll b/llvm/test/CodeGen/AArch64/build-pair-isel.ll index b9f03ed872cfd..970a2c69343f5 100644 --- a/llvm/test/CodeGen/AArch64/build-pair-isel.ll +++ b/llvm/test/CodeGen/AArch64/build-pair-isel.ll @@ -14,7 +14,7 @@ define void @compare_and_swap128() { ; CHECK-NEXT: mov w9, w10 ; CHECK-NEXT: mov w8, w8 ; CHECK-NEXT: // kill: def $x8 killed $w8 -; CHECK-NEXT: bfi x8, x9, #32, #32 +; CHECK-NEXT: orr x8, x8, x9, lsl #32 ; CHECK-NEXT: // implicit-def: $x9 ; CHECK-NEXT: str x8, [x9] ; CHECK-NEXT: ret @@ -22,5 +22,3 @@ define void @compare_and_swap128() { store i128 %1, i128* undef, align 16 ret void } - - diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll index bb37cc81a7ab1..c4481871dec49 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -19,8 +19,7 @@ define i8 @rotl_i8_const_shift(i8 %x) { ; CHECK-LABEL: rotl_i8_const_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ubfx w8, w0, #5, #3 -; CHECK-NEXT: bfi w8, w0, #3, #29 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w0, lsl #3 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) ret i8 %f diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll index 43e04e341b7e1..bff4f2113df3a 100644 --- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll @@ -463,8 +463,8 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 @@ -486,8 +486,8 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 @@ -527,8 +527,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 @@ -550,8 +550,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 @@ -576,8 +576,8 @@ define i16 @load_i16_from_nonzero_offset(i8* %p) { ; CHECK-LABEL: load_i16_from_nonzero_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrb w0, [x0, #2] -; CHECK-NEXT: bfi w0, w8, #8, #24 +; CHECK-NEXT: ldrb w9, [x0, #2] +; CHECK-NEXT: orr w0, w9, w8, lsl #8 ; CHECK-NEXT: ret %p1.i16 = bitcast i8* %p to i16* %p2.i8 = getelementptr i8, i8* %p, i64 2 diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll index 293967bcec75c..de1b0f13adf0a 100644 --- a/llvm/test/CodeGen/AArch64/load-combine.ll +++ b/llvm/test/CodeGen/AArch64/load-combine.ll @@ -453,8 +453,8 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -477,8 +477,8 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -521,8 +521,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -545,8 +545,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -603,7 +603,7 @@ define void @short_vector_to_i32_unused_low_i8(<4 x i8>* %in, i32* %out, i32* %p ; CHECK-NEXT: umov w10, v0.h[3] ; CHECK-NEXT: lsl w8, w8, #16 ; CHECK-NEXT: bfi w8, w9, #8, #8 -; CHECK-NEXT: bfi w8, w10, #24, #8 +; CHECK-NEXT: orr w8, w8, w10, lsl #24 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 @@ -634,8 +634,8 @@ define void @short_vector_to_i32_unused_high_i8(<4 x i8>* %in, i32* %out, i32* % ; CHECK-NEXT: ldrh w9, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: bfi w9, w8, #16, #8 -; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 @@ -665,7 +665,7 @@ define void @short_vector_to_i32_unused_low_i16(<4 x i8>* %in, i32* %out, i32* % ; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: lsl w8, w8, #24 -; CHECK-NEXT: bfi w8, w9, #16, #8 +; CHECK-NEXT: orr w8, w8, w9, lsl #16 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll index 12c3e18317f88..ba63c4433a2a3 100644 --- a/llvm/test/CodeGen/AArch64/logic-shift.ll +++ b/llvm/test/CodeGen/AArch64/logic-shift.ll @@ -818,8 +818,7 @@ define i32 @or_fshr_wrong_shift(i32 %x, i32 %y) { ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, w1 ; CHECK-NEXT: lsr w8, w8, #26 -; CHECK-NEXT: bfi w8, w0, #7, #25 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w0, lsl #7 ; CHECK-NEXT: ret %or1 = or i32 %x, %y %sh1 = shl i32 %x, 7 diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll index 288ba22e79289..f8ff50b6e4c54 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -490,27 +490,27 @@ define <4 x i65> @test_ldnp_v4i65(<4 x i65>* %A) { ; ; CHECK-BE-LABEL: test_ldnp_v4i65: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldp x9, x8, [x0, #16] -; CHECK-BE-NEXT: ldp x11, x10, [x0] -; CHECK-BE-NEXT: ldrb w7, [x0, #32] -; CHECK-BE-NEXT: lsr x13, x9, #56 -; CHECK-BE-NEXT: lsr x14, x11, #56 -; CHECK-BE-NEXT: extr x15, x10, x9, #56 -; CHECK-BE-NEXT: bfi x7, x8, #8, #56 -; CHECK-BE-NEXT: extr x8, x9, x8, #56 -; CHECK-BE-NEXT: extr x12, x11, x10, #56 -; CHECK-BE-NEXT: lsr x11, x11, #59 -; CHECK-BE-NEXT: ubfx x9, x9, #57, #1 +; CHECK-BE-NEXT: ldp x10, x9, [x0, #16] +; CHECK-BE-NEXT: ldp x12, x11, [x0] +; CHECK-BE-NEXT: ldrb w8, [x0, #32] +; CHECK-BE-NEXT: lsr x13, x10, #56 +; CHECK-BE-NEXT: lsr x14, x12, #56 +; CHECK-BE-NEXT: extr x15, x11, x10, #56 +; CHECK-BE-NEXT: orr x7, x8, x9, lsl #8 +; CHECK-BE-NEXT: extr x8, x10, x9, #56 +; CHECK-BE-NEXT: extr x9, x12, x11, #56 +; CHECK-BE-NEXT: lsr x12, x12, #59 +; CHECK-BE-NEXT: ubfx x10, x10, #57, #1 ; CHECK-BE-NEXT: extr x5, x13, x8, #1 -; CHECK-BE-NEXT: extr x1, x14, x12, #3 -; CHECK-BE-NEXT: ubfx x12, x10, #58, #1 -; CHECK-BE-NEXT: fmov d0, x11 -; CHECK-BE-NEXT: and x11, x8, #0x1 -; CHECK-BE-NEXT: lsr x10, x10, #56 -; CHECK-BE-NEXT: fmov d2, x9 -; CHECK-BE-NEXT: fmov d1, x12 -; CHECK-BE-NEXT: extr x3, x10, x15, #2 -; CHECK-BE-NEXT: fmov d3, x11 +; CHECK-BE-NEXT: extr x1, x14, x9, #3 +; CHECK-BE-NEXT: ubfx x9, x11, #58, #1 +; CHECK-BE-NEXT: fmov d0, x12 +; CHECK-BE-NEXT: and x12, x8, #0x1 +; CHECK-BE-NEXT: lsr x11, x11, #56 +; CHECK-BE-NEXT: fmov d2, x10 +; CHECK-BE-NEXT: fmov d1, x9 +; CHECK-BE-NEXT: extr x3, x11, x15, #2 +; CHECK-BE-NEXT: fmov d3, x12 ; CHECK-BE-NEXT: mov v0.d[1], x1 ; CHECK-BE-NEXT: mov v2.d[1], x5 ; CHECK-BE-NEXT: mov v1.d[1], x3 diff --git a/llvm/test/CodeGen/AArch64/rotate-extract.ll b/llvm/test/CodeGen/AArch64/rotate-extract.ll index 9a1c6a965bf7f..20008c41c42e8 100644 --- a/llvm/test/CodeGen/AArch64/rotate-extract.ll +++ b/llvm/test/CodeGen/AArch64/rotate-extract.ll @@ -113,8 +113,8 @@ define i64 @no_extract_mul(i64 %i) nounwind { ; CHECK-LABEL: no_extract_mul: ; CHECK: // %bb.0: ; CHECK-NEXT: add x8, x0, x0, lsl #3 -; CHECK-NEXT: lsr x0, x8, #57 -; CHECK-NEXT: bfi x0, x8, #8, #56 +; CHECK-NEXT: lsr x9, x8, #57 +; CHECK-NEXT: orr x0, x9, x8, lsl #8 ; CHECK-NEXT: ret %lhs_mul = mul i64 %i, 2304 %rhs_mul = mul i64 %i, 9 diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 85f20ba18cc13..7b5041fc58cc9 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -391,20 +391,20 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: ubfx x12, x9, #12, #20 ; CHECK-NEXT: lsr x15, x9, #31 -; CHECK-NEXT: bfi w13, w11, #16, #8 -; CHECK-NEXT: lsr x11, x9, #50 +; CHECK-NEXT: orr w11, w13, w11, lsl #16 +; CHECK-NEXT: lsr x13, x9, #50 ; CHECK-NEXT: mov.s v0[1], w14 ; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: lsr x12, x10, #38 -; CHECK-NEXT: bfi w11, w13, #14, #18 +; CHECK-NEXT: orr w13, w13, w11, lsl #14 ; CHECK-NEXT: lsr x10, x10, #57 -; CHECK-NEXT: bfi w10, w9, #7, #25 -; CHECK-NEXT: lsr w9, w13, #5 +; CHECK-NEXT: orr w9, w10, w9, lsl #7 +; CHECK-NEXT: lsr w10, w11, #5 ; CHECK-NEXT: mov.s v1[1], w15 ; CHECK-NEXT: mov.s v0[2], w12 -; CHECK-NEXT: mov.s v1[2], w11 -; CHECK-NEXT: mov.s v0[3], w10 -; CHECK-NEXT: mov.s v1[3], w9 +; CHECK-NEXT: mov.s v1[2], w13 +; CHECK-NEXT: mov.s v0[3], w9 +; CHECK-NEXT: mov.s v1[3], w10 ; CHECK-NEXT: uzp1.8h v0, v0, v1 ; CHECK-NEXT: xtn.8b v0, v0 ; CHECK-NEXT: str d0, [x1, x8, lsl #3] @@ -420,21 +420,21 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: .LBB5_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrh w15, [x0, #16] -; CHECK-BE-NEXT: lsr x12, x10, #40 +; CHECK-BE-NEXT: ldrh w11, [x0, #16] ; CHECK-BE-NEXT: lsr x13, x10, #45 -; CHECK-BE-NEXT: lsr x11, x9, #40 +; CHECK-BE-NEXT: lsr x15, x10, #40 +; CHECK-BE-NEXT: lsr x12, x9, #40 ; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 ; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 -; CHECK-BE-NEXT: bfi w16, w12, #14, #18 -; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 -; CHECK-BE-NEXT: bfi w14, w11, #7, #24 -; CHECK-BE-NEXT: ldrb w11, [x0, #18] +; CHECK-BE-NEXT: orr w12, w14, w12, lsl #7 +; CHECK-BE-NEXT: ldrb w14, [x0, #18] +; CHECK-BE-NEXT: orr w15, w16, w15, lsl #14 ; CHECK-BE-NEXT: fmov s0, w13 ; CHECK-BE-NEXT: add x0, x0, #32 -; CHECK-BE-NEXT: fmov s1, w14 -; CHECK-BE-NEXT: bfi w11, w15, #8, #16 -; CHECK-BE-NEXT: mov v0.s[1], w16 +; CHECK-BE-NEXT: fmov s1, w12 +; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 +; CHECK-BE-NEXT: orr w11, w14, w11, lsl #8 +; CHECK-BE-NEXT: mov v0.s[1], w15 ; CHECK-BE-NEXT: mov v1.s[1], w12 ; CHECK-BE-NEXT: extr x12, x10, x9, #40 ; CHECK-BE-NEXT: lsl x9, x9, #24 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq.ll b/llvm/test/CodeGen/AArch64/urem-seteq.ll index 9f9e3f712a624..56b030dcca52a 100644 --- a/llvm/test/CodeGen/AArch64/urem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq.ll @@ -82,8 +82,8 @@ define i16 @test_urem_even(i16 %X) nounwind { ; CHECK-NEXT: mul w8, w0, w8 ; CHECK-NEXT: and w9, w8, #0xfffc ; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: bfi w9, w8, #15, #17 -; CHECK-NEXT: ubfx w8, w9, #1, #15 +; CHECK-NEXT: orr w8, w9, w8, lsl #15 +; CHECK-NEXT: ubfx w8, w8, #1, #15 ; CHECK-NEXT: cmp w8, #2340 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll index eab5c8abd0204..9e73cc5195e4a 100644 --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -249,17 +249,18 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: bfi w9, w10, #2, #1 +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: bfi w8, w9, #1, #1 ; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: bfi w9, w11, #3, #29 -; CHECK-NEXT: and w8, w9, #0xf +; CHECK-NEXT: bfi w8, w10, #2, #1 +; CHECK-NEXT: orr w8, w8, w11, lsl #3 +; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll index 0fccb574644f1..4b61a873706ad 100644 --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -299,14 +299,15 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: umov w11, v1.h[3] -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: bfi w9, w11, #3, #29 -; CHECK-NEXT: and w8, w9, #0xf +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: bfi w8, w9, #1, #1 +; CHECK-NEXT: bfi w8, w10, #2, #1 +; CHECK-NEXT: orr w8, w8, w11, lsl #3 +; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) From fa981b541365190ae646d2dce575706cd0626cf7 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 19:50:20 +0100 Subject: [PATCH 174/516] Set LLVM_ATOMIC_LIB variable for convenient linking against libatomic * Set LLVM_ATOMIC_LIB to keep track of when we need to link against libatomic. * Add detection of mold linker which is required for this. * Use --as-needed when linking against libatomic as a bonus. On some platforms, libatomic may be required only sometimes. Bug: https://bugs.gentoo.org/832675 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136280 --- llvm/cmake/modules/AddLLVM.cmake | 1 + llvm/cmake/modules/CheckAtomic.cmake | 13 +++++++++++++ llvm/lib/Support/CMakeLists.txt | 4 +--- llvm/tools/dsymutil/CMakeLists.txt | 4 +--- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 30ac0040e5650..428a22422e288 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -219,6 +219,7 @@ if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32) else() if("${stdout}" MATCHES "^mold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") + set(LLVM_LINKER_IS_MOLD YES CACHE INTERNAL "") message(STATUS "Linker detection: mold") elseif("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") diff --git a/llvm/cmake/modules/CheckAtomic.cmake b/llvm/cmake/modules/CheckAtomic.cmake index 3c5ba72993a3a..f11cadf39ff6b 100644 --- a/llvm/cmake/modules/CheckAtomic.cmake +++ b/llvm/cmake/modules/CheckAtomic.cmake @@ -82,6 +82,19 @@ elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CMAKE_CXX_COMPILER_ID MATCHES "XL") endif() endif() +# Set variable LLVM_ATOMIC_LIB specifying flags for linking against libatomic. +if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) + # Use options --push-state, --as-needed and --pop-state if linker is known to support them. + # Use single option -Wl of compiler driver to avoid incorrect re-ordering of options by CMake. + if(LLVM_LINKER_IS_GNULD OR LLVM_LINKER_IS_GOLD OR LLVM_LINKER_IS_LLD OR LLVM_LINKER_IS_MOLD) + set(LLVM_ATOMIC_LIB "-Wl,--push-state,--as-needed,-latomic,--pop-state") + else() + set(LLVM_ATOMIC_LIB "-latomic") + endif() +else() + set(LLVM_ATOMIC_LIB) +endif() + ## TODO: This define is only used for the legacy atomic operations in ## llvm's Atomic.h, which should be replaced. Other code simply ## assumes C++11 works. diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index bc19b5be21409..023d50793effa 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -59,9 +59,7 @@ elseif( CMAKE_HOST_UNIX ) if( LLVM_ENABLE_TERMINFO ) set(imported_libs ${imported_libs} Terminfo::terminfo) endif() - if( LLVM_ENABLE_THREADS AND (HAVE_LIBATOMIC OR HAVE_CXX_LIBATOMICS64) ) - set(system_libs ${system_libs} atomic) - endif() + set(system_libs ${system_libs} ${LLVM_ATOMIC_LIB}) set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB}) if( UNIX AND NOT (BEOS OR HAIKU) ) set(system_libs ${system_libs} m) diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index a255c1c5daf51..38028cd3d80a3 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -40,6 +40,4 @@ if(APPLE) target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation") endif(APPLE) -if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) - target_link_libraries(dsymutil PRIVATE atomic) -endif() +target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) From f0b451c77f14947e3e7d314f048679fa2f5c6298 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 20:09:34 +0100 Subject: [PATCH 175/516] Link liblldCOFF against libatomic when necessary Also simplify code for liblldCommon using the new LLVM_ATOMIC_LIB variable. Depends on D136280. Bug: https://bugs.gentoo.org/832675 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136281 --- lld/COFF/CMakeLists.txt | 1 + lld/Common/CMakeLists.txt | 9 ++------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index d289bd5910348..55aec26854c8d 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -44,6 +44,7 @@ add_lld_library(lldCOFF LINK_LIBS lldCommon ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS COFFOptionsTableGen diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 1ae7da1f5f7f0..9c23ed3952235 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -1,9 +1,3 @@ -set(LLD_SYSTEM_LIBS ${LLVM_PTHREAD_LIB}) - -if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) - list(APPEND LLD_SYSTEM_LIBS atomic) -endif() - find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc) find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc) @@ -54,7 +48,8 @@ add_lld_library(lldCommon Target LINK_LIBS - ${LLD_SYSTEM_LIBS} + ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS intrinsics_gen From 20132d8eaa68a6c53e152718beda1dc0f4c9ff6c Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 19 Oct 2022 20:12:10 +0100 Subject: [PATCH 176/516] Link libclangBasic against libatomic when necessary. This is necessary at least on PPC32. Depends on D136280. Bug: https://bugs.gentoo.org/874024 Thanks-to: Arfrever Frehtes Taifersar Arahesis Tested-by: erhard_f@mailbox.org Differential Revision: https://reviews.llvm.org/D136282 --- clang/CMakeLists.txt | 1 + clang/lib/Basic/CMakeLists.txt | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 2ca81e506338c..236e6fbaca280 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -63,6 +63,7 @@ if(CLANG_BUILT_STANDALONE) include(TableGen) include(HandleLLVMOptions) include(VersionFromVCS) + include(CheckAtomic) include(GetErrcMessages) include(LLVMDistributionSupport) diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 5d197f59ac4f7..f0f3839a7e2c3 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -111,3 +111,7 @@ add_clang_library(clangBasic omp_gen ) +target_link_libraries(clangBasic + PRIVATE + ${LLVM_ATOMIC_LIB} +) From db698c535fd5a131429e305e194a8524f15424ba Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Tue, 1 Nov 2022 14:21:23 -0700 Subject: [PATCH 177/516] [test-release] Build BOLT by default for x86/arm Make BOLT build by default in X86 and AArch64 Linux boxes. Reviewed By: thieta, xbolva00 Differential Revision: https://reviews.llvm.org/D137305 --- llvm/utils/release/test-release.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh index 9208161c7da1b..cb01ed5bd8843 100755 --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -12,6 +12,7 @@ #===------------------------------------------------------------------------===# System=`uname -s` +Machine=`uname -m` if [ "$System" = "FreeBSD" ]; then MAKE=gmake else @@ -35,7 +36,6 @@ do_libcxxabi="yes" do_libunwind="yes" do_test_suite="yes" do_openmp="yes" -do_bolt="no" do_lld="yes" do_lldb="yes" do_polly="yes" @@ -47,6 +47,15 @@ ExtraConfigureFlags="" ExportBranch="" git_ref="" +do_bolt="no" +if [ "$System" = "Linux" ]; then + case $Machine in + x86_64 | arm64 | aarch64 ) + do_bolt="yes" + ;; + esac +fi + function usage() { echo "usage: `basename $0` -release X.Y.Z -rc NUM [OPTIONS]" echo "" From 24f9293de8794963bd29c731745a71ef6a1aab9d Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Thu, 3 Nov 2022 20:30:12 +0000 Subject: [PATCH 178/516] [mlir][Tensor] Allow builders of `tensor.empty` to accept encoding attribute. The `RankedTensorType` can have an optional encoding attribute. Allowing the builders of `tensor.empty` to accept the encoding attribute (optionally), allows building empty tensors with the type having the encoding attribute. Reviewed By: nicolasvasilache, hanchung, springerm Differential Revision: https://reviews.llvm.org/D137297 --- mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td | 9 ++++++--- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 14 ++++++++------ mlir/test/Dialect/Tensor/ops.mlir | 9 +++++++++ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 2cfdc6d8c6feb..552d2db97435c 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -191,14 +191,17 @@ def Tensor_EmptyOp : Tensor_Op<"empty", let builders = [ // Build with fully static sizes. - OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType)>, + OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType, + CArg<"Attribute", "{}">:$encoding)>, // Build with mixed static/dynamic sizes. OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType, - "ValueRange":$dynamicSizes)>, + "ValueRange":$dynamicSizes, + CArg<"Attribute", "{}">:$encoding)>, // Build with mixed static/dynamic sizes. - OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType)> + OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType, + CArg<"Attribute", "{}">:$encoding)> ]; let hasCanonicalizer = 1; diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 445e78e295fd1..31d892ffb6e41 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -497,27 +497,29 @@ void DimOp::getCanonicalizationPatterns(RewritePatternSet &results, //===----------------------------------------------------------------------===// void EmptyOp::build(OpBuilder &builder, OperationState &result, - ArrayRef staticShape, Type elementType) { + ArrayRef staticShape, Type elementType, + Attribute encoding) { assert(all_of(staticShape, [](int64_t sz) { return !ShapedType::isDynamic(sz); }) && "expected only static sizes"); - build(builder, result, staticShape, elementType, {}); + build(builder, result, staticShape, elementType, ValueRange{}, encoding); } void EmptyOp::build(OpBuilder &builder, OperationState &result, ArrayRef staticShape, Type elementType, - ValueRange dynamicSizes) { - auto tensorType = RankedTensorType::get(staticShape, elementType); + ValueRange dynamicSizes, Attribute encoding) { + auto tensorType = RankedTensorType::get(staticShape, elementType, encoding); build(builder, result, tensorType, dynamicSizes); } void EmptyOp::build(OpBuilder &builder, OperationState &result, - ArrayRef sizes, Type elementType) { + ArrayRef sizes, Type elementType, + Attribute encoding) { SmallVector staticShape; SmallVector dynamicSizes; dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape, ShapedType::kDynamicSize); - build(builder, result, staticShape, elementType, dynamicSizes); + build(builder, result, staticShape, elementType, dynamicSizes, encoding); } LogicalResult EmptyOp::verify() { diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir index aadf6ab90250d..4afe128d05eb3 100644 --- a/mlir/test/Dialect/Tensor/ops.mlir +++ b/mlir/test/Dialect/Tensor/ops.mlir @@ -21,6 +21,15 @@ func.func @empty(%sz: index) -> tensor<5x?x6xf32> { return %0 : tensor<5x?x6xf32> } +// CHECK-LABEL: func @empty_with_encoding( +// CHECK-SAME: %[[sz:.*]]: index +func.func @empty_with_encoding(%sz: index) -> tensor<5x?x6xf32, "foo"> { + // CHECK: tensor.empty(%[[sz]]) : tensor<5x?x6xf32, "foo"> + %0 = tensor.empty(%sz) : tensor<5x?x6xf32, "foo"> + return %0 : tensor<5x?x6xf32, "foo"> +} + + // CHECK-LABEL: func @extract( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[INDEX:.*]]: index) { From 38f34e587d10fcd7d18fd240e41248006faa639e Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Thu, 3 Nov 2022 20:38:34 +0000 Subject: [PATCH 179/516] [mlir][Arith] Fix folder of CmpIOp to not fail when element type is not integer. The folder used `cast` which would segfault if the type were a vector type. Handle this case appropriately and avoid failure. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D137345 --- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 19 +++++++++++++++-- mlir/test/Dialect/Arith/canonicalize.mlir | 26 +++++++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index d1d03a549092d..2c0fc51d08a40 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::arith; @@ -1444,6 +1445,16 @@ static Attribute getBoolAttribute(Type type, MLIRContext *ctx, bool value) { return DenseElementsAttr::get(shapedType, boolAttr); } +static Optional getIntegerWidth(Type t) { + if (auto intType = t.dyn_cast()) { + return intType.getWidth(); + } + if (auto vectorIntType = t.dyn_cast()) { + return vectorIntType.getElementType().cast().getWidth(); + } + return llvm::None; +} + OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpi takes two operands"); @@ -1456,13 +1467,17 @@ OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) { if (auto extOp = getLhs().getDefiningOp()) { // extsi(%x : i1 -> iN) != 0 -> %x - if (extOp.getOperand().getType().cast().getWidth() == 1 && + Optional integerWidth = + getIntegerWidth(extOp.getOperand().getType()); + if (integerWidth && integerWidth.value() == 1 && getPredicate() == arith::CmpIPredicate::ne) return extOp.getOperand(); } if (auto extOp = getLhs().getDefiningOp()) { // extui(%x : i1 -> iN) != 0 -> %x - if (extOp.getOperand().getType().cast().getWidth() == 1 && + Optional integerWidth = + getIntegerWidth(extOp.getOperand().getType()); + if (integerWidth && integerWidth.value() == 1 && getPredicate() == arith::CmpIPredicate::ne) return extOp.getOperand(); } diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 337eec00f3bf9..336324ef4eec9 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -162,7 +162,7 @@ func.func @cmpi_const_right(%arg0: i64) // ----- -// CHECK-LABEL: @cmpOfExtSI +// CHECK-LABEL: @cmpOfExtSI( // CHECK-NEXT: return %arg0 func.func @cmpOfExtSI(%arg0: i1) -> i1 { %ext = arith.extsi %arg0 : i1 to i64 @@ -171,7 +171,7 @@ func.func @cmpOfExtSI(%arg0: i1) -> i1 { return %res : i1 } -// CHECK-LABEL: @cmpOfExtUI +// CHECK-LABEL: @cmpOfExtUI( // CHECK-NEXT: return %arg0 func.func @cmpOfExtUI(%arg0: i1) -> i1 { %ext = arith.extui %arg0 : i1 to i64 @@ -182,6 +182,26 @@ func.func @cmpOfExtUI(%arg0: i1) -> i1 { // ----- +// CHECK-LABEL: @cmpOfExtSIVector( +// CHECK-NEXT: return %arg0 +func.func @cmpOfExtSIVector(%arg0: vector<4xi1>) -> vector<4xi1> { + %ext = arith.extsi %arg0 : vector<4xi1> to vector<4xi64> + %c0 = arith.constant dense<0> : vector<4xi64> + %res = arith.cmpi ne, %ext, %c0 : vector<4xi64> + return %res : vector<4xi1> +} + +// CHECK-LABEL: @cmpOfExtUIVector( +// CHECK-NEXT: return %arg0 +func.func @cmpOfExtUIVector(%arg0: vector<4xi1>) -> vector<4xi1> { + %ext = arith.extui %arg0 : vector<4xi1> to vector<4xi64> + %c0 = arith.constant dense<0> : vector<4xi64> + %res = arith.cmpi ne, %ext, %c0 : vector<4xi64> + return %res : vector<4xi1> +} + +// ----- + // CHECK-LABEL: @extSIOfExtUI // CHECK: %[[res:.+]] = arith.extui %arg0 : i1 to i64 // CHECK: return %[[res]] @@ -1660,3 +1680,5 @@ func.func @xorxor3(%a : i32, %b : i32) -> i32 { %res = arith.xori %b, %c : i32 return %res : i32 } + +// ----- From 2497d5aa7716a664c4f73df1980b026c906c7522 Mon Sep 17 00:00:00 2001 From: Manoj Gupta Date: Tue, 25 Oct 2022 12:29:03 -0700 Subject: [PATCH 180/516] Define _GNU_SOURCE for arm baremetal in C++ mode. This matches other C++ drivers e.g. Linux that define _GNU_SOURCE. This lets clang compiler more code by default without explicitly passing _GNU_SOURCE on command line. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D136712 --- clang/lib/Basic/Targets/ARM.cpp | 5 ++++- clang/test/Preprocessor/init-arm.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index f2db186aac4cb..c38849058e13d 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -705,8 +705,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // For bare-metal none-eabi. if (getTriple().getOS() == llvm::Triple::UnknownOS && (getTriple().getEnvironment() == llvm::Triple::EABI || - getTriple().getEnvironment() == llvm::Triple::EABIHF)) + getTriple().getEnvironment() == llvm::Triple::EABIHF)) { Builder.defineMacro("__ELF__"); + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); + } // Target properties. Builder.defineMacro("__REGISTER_PREFIX__", ""); diff --git a/clang/test/Preprocessor/init-arm.c b/clang/test/Preprocessor/init-arm.c index e317ffa67393d..a55d0d63a79b3 100644 --- a/clang/test/Preprocessor/init-arm.c +++ b/clang/test/Preprocessor/init-arm.c @@ -1450,3 +1450,8 @@ // THUMB-MINGW:#define __ARM_DWARF_EH__ 1 +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=thumbv6m-none-unknown-eabi < /dev/null | FileCheck -match-full-lines -check-prefix Thumbv6m-elf %s +// Thumbv6m-elf: #define __ELF__ 1 + +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=thumbv6m-none-unknown-eabi < /dev/null | FileCheck -match-full-lines -check-prefix Thumbv6m-cxx %s +// Thumbv6m-cxx: #define _GNU_SOURCE 1 From 247dedea4e031d3bff3a4fcf08cd259a13af279d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 3 Nov 2022 13:59:46 -0700 Subject: [PATCH 181/516] DebugInfo: Move missing-abstract-variable.ll test from Generic to X86 This test is XFAIL'd on most arches, and seems too fragile to be run on non-X86. Reviewed By: jmorse, probinson Differential Revision: https://reviews.llvm.org/D109806 --- .../{Generic => X86}/missing-abstract-variable.ll | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) rename llvm/test/DebugInfo/{Generic => X86}/missing-abstract-variable.ll (94%) diff --git a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll similarity index 94% rename from llvm/test/DebugInfo/Generic/missing-abstract-variable.ll rename to llvm/test/DebugInfo/X86/missing-abstract-variable.ll index 80e278e51194b..bf63cb41428a2 100644 --- a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll +++ b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll @@ -1,10 +1,4 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck %s - -; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on -; powerpc64 (and on x86_64 at at least -O2). Presumably this is a SelectionDAG -; issue. -; FIXME: arm64 is an alias for aarch64 on macs, apparently? -; XFAIL: powerpc64, aarch64, arm64, hexagon, riscv, sparc, loongarch +; RUN: %llc_dwarf -mtriple x86_64-gnu-linux -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck %s ; Build from the following source with clang -O2. From 3384f05a2cdb96a2f106c234ae8a9d0e306717a4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 26 Oct 2022 21:07:12 +0000 Subject: [PATCH 182/516] [llvm-objdump][Offload] Use common offload extraction method A previous patch introduced a common function used to extract offloading binaries from an image. Therefore we no longer need to duplicate the functionality in the `llvm-objdump` implementation. Functionally, this removes the old warning behaviour when given malformed input. This has been changed to a hard error, which is effectively the same. This required a slight tweak in the linker wrapper to filter out the user passing shared objects directly. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D136796 --- .../ClangLinkerWrapper.cpp | 4 ++ llvm/lib/Object/OffloadBinary.cpp | 8 ++- .../Offloading/content-failure.test | 2 +- .../Offloading/{binary.test => elf.test} | 14 ++-- .../llvm-objdump/Offloading/warning.test | 21 ------ llvm/tools/llvm-objdump/OffloadDump.cpp | 65 +++++-------------- 6 files changed, 38 insertions(+), 76 deletions(-) rename llvm/test/tools/llvm-objdump/Offloading/{binary.test => elf.test} (67%) delete mode 100644 llvm/test/tools/llvm-objdump/Offloading/warning.test diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 3ad22be755f3c..6a12b64f7d7dd 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1259,6 +1259,10 @@ Expected> getDeviceInput(const ArgList &Args) { if (std::error_code EC = BufferOrErr.getError()) return createFileError(Filename, EC); + if (identify_magic((*BufferOrErr)->getBuffer()) == + file_magic::elf_shared_object) + continue; + bool IsLazy = identify_magic((*BufferOrErr)->getBuffer()) == file_magic::archive; if (Error Err = extractOffloadBinaries( diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp index 8f62d692d050a..3f7a60d89c27d 100644 --- a/llvm/lib/Object/OffloadBinary.cpp +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -41,6 +41,10 @@ Error extractOffloadFiles(MemoryBufferRef Contents, std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "", /*RequiresNullTerminator*/ false); + if (!isAddrAligned(Align(OffloadBinary::getAlignment()), + Buffer->getBufferStart())) + Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), + Buffer->getBufferIdentifier()); auto BinaryOrErr = OffloadBinary::create(*Buffer); if (!BinaryOrErr) return BinaryOrErr.takeError(); @@ -254,7 +258,9 @@ Error object::extractOffloadBinaries(MemoryBufferRef Buffer, switch (Type) { case file_magic::bitcode: return extractFromBitcode(Buffer, Binaries); - case file_magic::elf_relocatable: { + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: { Expected> ObjFile = ObjectFile::createObjectFile(Buffer, Type); if (!ObjFile) diff --git a/llvm/test/tools/llvm-objdump/Offloading/content-failure.test b/llvm/test/tools/llvm-objdump/Offloading/content-failure.test index 5089edae04502..40ff6785f2d38 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/content-failure.test +++ b/llvm/test/tools/llvm-objdump/Offloading/content-failure.test @@ -15,4 +15,4 @@ Sections: ShOffset: 0x99999 AddressAlign: 0x0000000000000008 -# CHECK: error: '[[FILENAME]]': The end of the file was unexpectedly encountered +# CHECK: error: '[[FILENAME]]': while extracting offloading files: The end of the file was unexpectedly encountered diff --git a/llvm/test/tools/llvm-objdump/Offloading/binary.test b/llvm/test/tools/llvm-objdump/Offloading/elf.test similarity index 67% rename from llvm/test/tools/llvm-objdump/Offloading/binary.test rename to llvm/test/tools/llvm-objdump/Offloading/elf.test index 880bab2ec5337..10182aeb856cd 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/binary.test +++ b/llvm/test/tools/llvm-objdump/Offloading/elf.test @@ -3,15 +3,21 @@ # RUN: llvm-objdump --offloading %t.bin | FileCheck %s --match-full-lines --strict-whitespace --implicit-check-not={{.}} ## Check that we can dump an offloading binary inside of an ELF section. -# RUN: yaml2obj %s -o %t.elf -# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.elf -# RUN: llvm-objdump --offloading %t.elf | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: yaml2obj %s -o %t -DTYPE=ET_EXEC +# RUN: yaml2obj %s -o %t.so -DTYPE=ET_DYN +# RUN: yaml2obj %s -o %t.o -DTYPE=ET_REL +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.so +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.o +# RUN: llvm-objdump --offloading %t | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: llvm-objdump --offloading %t.so | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: llvm-objdump --offloading %t.o | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - Type: ET_EXEC + Type: [[TYPE]] Sections: - Name: .llvm.offloading Type: SHT_LLVM_OFFLOADING diff --git a/llvm/test/tools/llvm-objdump/Offloading/warning.test b/llvm/test/tools/llvm-objdump/Offloading/warning.test deleted file mode 100644 index a4be54ebf6dc3..0000000000000 --- a/llvm/test/tools/llvm-objdump/Offloading/warning.test +++ /dev/null @@ -1,21 +0,0 @@ -## Ensure we give a warning on bad input following good input. -# RUN: yaml2obj %S/Inputs/binary.yaml -o %t-good.bin -# RUN: yaml2obj %S/Inputs/malformed.yaml -o %t-bad.bin -# RUN: cat %t-bad.bin >> %t-good.bin -# RUN: yaml2obj %s -o %t.elf -# RUN: llvm-objcopy --update-section .llvm.offloading=%t-good.bin %t.elf -# RUN: llvm-objdump --offloading %t.elf 2>&1 | FileCheck %s -DFILENAME=%t.elf - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .llvm.offloading - Type: SHT_LLVM_OFFLOADING - Flags: [ SHF_EXCLUDE ] - AddressAlign: 0x0000000000000008 - -# CHECK: OFFLOADING IMAGE [0]: -# CHECK: warning: '[[FILENAME]]': while parsing offloading files: The end of the file was unexpectedly encountered diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp index 46334c249070d..fdef373279b39 100644 --- a/llvm/tools/llvm-objdump/OffloadDump.cpp +++ b/llvm/tools/llvm-objdump/OffloadDump.cpp @@ -10,6 +10,7 @@ /// This file implements the offloading-specific dumper for llvm-objdump. /// //===----------------------------------------------------------------------===// + #include "OffloadDump.h" #include "llvm-objdump.h" #include "llvm/Object/ELFObjectFile.h" @@ -46,24 +47,6 @@ static void printBinary(const OffloadBinary &OB, uint64_t Index) { << getOffloadKindName(OB.getOffloadKind()) << "\n"; } -static Error visitAllBinaries(const OffloadBinary &OB) { - uint64_t Offset = 0; - uint64_t Index = 0; - while (Offset < OB.getMemoryBufferRef().getBufferSize()) { - MemoryBufferRef Buffer = - MemoryBufferRef(OB.getData().drop_front(Offset), OB.getFileName()); - auto BinaryOrErr = OffloadBinary::create(Buffer); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - - OffloadBinary &Binary = **BinaryOrErr; - printBinary(Binary, Index++); - - Offset += Binary.getSize(); - } - return Error::success(); -} - /// Print the embedded offloading contents of an ObjectFile \p O. void llvm::dumpOffloadBinary(const ObjectFile &O) { if (!O.isELF()) { @@ -72,41 +55,25 @@ void llvm::dumpOffloadBinary(const ObjectFile &O) { return; } - for (ELFSectionRef Sec : O.sections()) { - if (Sec.getType() != ELF::SHT_LLVM_OFFLOADING) - continue; - - Expected Contents = Sec.getContents(); - if (!Contents) - reportError(Contents.takeError(), O.getFileName()); - - std::unique_ptr Buffer = - MemoryBuffer::getMemBuffer(*Contents, O.getFileName(), false); - if (!isAddrAligned(Align(OffloadBinary::getAlignment()), - Buffer->getBufferStart())) - Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), - Buffer->getBufferIdentifier()); - auto BinaryOrErr = OffloadBinary::create(*Buffer); - if (!BinaryOrErr) - reportError(O.getFileName(), "while extracting offloading files: " + - toString(BinaryOrErr.takeError())); - OffloadBinary &Binary = **BinaryOrErr; + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(O.getMemoryBufferRef(), Binaries)) + reportError(O.getFileName(), "while extracting offloading files: " + + toString(std::move(Err))); - // Print out all the binaries that are contained in this buffer. If we fail - // to parse a binary before reaching the end of the buffer emit a warning. - if (Error Err = visitAllBinaries(Binary)) - reportWarning("while parsing offloading files: " + - toString(std::move(Err)), - O.getFileName()); - } + // Print out all the binaries that are contained in this buffer. + for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) + printBinary(*Binaries[I].getBinary(), I); } /// Print the contents of an offload binary file \p OB. This may contain /// multiple binaries stored in the same buffer. void llvm::dumpOffloadSections(const OffloadBinary &OB) { - // Print out all the binaries that are contained at this buffer. If we fail to - // parse a binary before reaching the end of the buffer emit a warning. - if (Error Err = visitAllBinaries(OB)) - reportWarning("while parsing offloading files: " + toString(std::move(Err)), - OB.getFileName()); + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(OB.getMemoryBufferRef(), Binaries)) + reportError(OB.getFileName(), "while extracting offloading files: " + + toString(std::move(Err))); + + // Print out all the binaries that are contained in this buffer. + for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) + printBinary(*Binaries[I].getBinary(), I); } From 8298f0b7b92e5e2ff8d547cd93863f094d13cb06 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 27 Oct 2022 16:13:22 +0000 Subject: [PATCH 183/516] [Binary] Support extracting offloading files from COFF This patch adds initial support for extracting offloading binaries from `COFF` objects. This is a first step to allow building offloading files on Windows targets with the new driver. Depends on D136796 Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D136855 --- llvm/lib/Object/OffloadBinary.cpp | 26 +++++++++--- .../tools/llvm-objdump/Offloading/coff.test | 42 +++++++++++++++++++ .../llvm-objdump/Offloading/non-elf.test | 20 ++++----- llvm/tools/llvm-objdump/OffloadDump.cpp | 7 ++-- 4 files changed, 77 insertions(+), 18 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/Offloading/coff.test diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp index 3f7a60d89c27d..d8cdcdc21d39c 100644 --- a/llvm/lib/Object/OffloadBinary.cpp +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" @@ -66,12 +67,26 @@ Error extractOffloadFiles(MemoryBufferRef Contents, } // Extract offloading binaries from an Object file \p Obj. -Error extractFromBinary(const ObjectFile &Obj, +Error extractFromObject(const ObjectFile &Obj, SmallVectorImpl &Binaries) { - for (ELFSectionRef Sec : Obj.sections()) { - if (Sec.getType() != ELF::SHT_LLVM_OFFLOADING) + assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type"); + + for (SectionRef Sec : Obj.sections()) { + // ELF files contain a section with the LLVM_OFFLOADING type. + if (Obj.isELF() && + static_cast(Sec).getType() != ELF::SHT_LLVM_OFFLOADING) continue; + // COFF has no section types so we rely on the name of the section. + if (Obj.isCOFF()) { + Expected NameOrErr = Sec.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + if (!NameOrErr->equals(".llvm.offloading")) + continue; + } + Expected Buffer = Sec.getContents(); if (!Buffer) return Buffer.takeError(); @@ -260,12 +275,13 @@ Error object::extractOffloadBinaries(MemoryBufferRef Buffer, return extractFromBitcode(Buffer, Binaries); case file_magic::elf_relocatable: case file_magic::elf_executable: - case file_magic::elf_shared_object: { + case file_magic::elf_shared_object: + case file_magic::coff_object: { Expected> ObjFile = ObjectFile::createObjectFile(Buffer, Type); if (!ObjFile) return ObjFile.takeError(); - return extractFromBinary(*ObjFile->get(), Binaries); + return extractFromObject(*ObjFile->get(), Binaries); } case file_magic::archive: { Expected> LibFile = diff --git a/llvm/test/tools/llvm-objdump/Offloading/coff.test b/llvm/test/tools/llvm-objdump/Offloading/coff.test new file mode 100644 index 0000000000000..022277d137bd4 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/Offloading/coff.test @@ -0,0 +1,42 @@ +# RUN: yaml2obj %S/Inputs/binary.yaml -o %t.bin +# RUN: yaml2obj %s -o %t.coff +# RUN: llvm-objcopy --add-section .llvm.offloading=%t.bin %t.coff +# RUN: llvm-objdump --offloading %t.coff | FileCheck %s --match-full-lines --strict-whitespace --implicit-check-not={{.}} + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .rdata + Characteristics: [] + - Name: .llvm.offloading + Characteristics: [ IMAGE_SCN_LNK_REMOVE, IMAGE_SCN_MEM_DISCARDABLE ] + Alignment: 8 +symbols: + +# CHECK:{{.*}}file format coff-x86-64 +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [0]: +# CHECK-NEXT:kind llvm ir +# CHECK-NEXT:arch gfx908 +# CHECK-NEXT:triple amdgcn-amd-amdhsa +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [1]: +# CHECK-NEXT:kind llvm ir +# CHECK-NEXT:arch gfx90a +# CHECK-NEXT:triple amdgcn-amd-amdhsa +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [2]: +# CHECK-NEXT:kind cubin +# CHECK-NEXT:arch sm_52 +# CHECK-NEXT:triple nvptx64-nvidia-cuda +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [3]: +# CHECK-NEXT:kind +# CHECK-NEXT:arch sm_70 +# CHECK-NEXT:triple nvptx64-nvidia-cuda +# CHECK-NEXT:producer none diff --git a/llvm/test/tools/llvm-objdump/Offloading/non-elf.test b/llvm/test/tools/llvm-objdump/Offloading/non-elf.test index 955556f0567b5..a2eb377bb35d1 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/non-elf.test +++ b/llvm/test/tools/llvm-objdump/Offloading/non-elf.test @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump --offloading %t 2>&1 | FileCheck -DFILENAME=%t %s ---- !COFF -header: - Machine: IMAGE_FILE_MACHINE_AMD64 - Characteristics: [] -sections: - - Name: .rdata - Characteristics: [] - SectionData: 00 -symbols: +--- !mach-o +FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 0 + sizeofcmds: 0 + flags: 0x00002000 -# CHECK: warning: '[[FILENAME]]': --offloading is currently only supported for ELF targets +# CHECK: warning: '[[FILENAME]]': --offloading is currently only supported for COFF and ELF targets diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp index fdef373279b39..4ac6b99e79bbb 100644 --- a/llvm/tools/llvm-objdump/OffloadDump.cpp +++ b/llvm/tools/llvm-objdump/OffloadDump.cpp @@ -49,9 +49,10 @@ static void printBinary(const OffloadBinary &OB, uint64_t Index) { /// Print the embedded offloading contents of an ObjectFile \p O. void llvm::dumpOffloadBinary(const ObjectFile &O) { - if (!O.isELF()) { - reportWarning("--offloading is currently only supported for ELF targets", - O.getFileName()); + if (!O.isELF() && !O.isCOFF()) { + reportWarning( + "--offloading is currently only supported for COFF and ELF targets", + O.getFileName()); return; } From 76c0ee5cb74017546dae5194d1948201be41b1b9 Mon Sep 17 00:00:00 2001 From: varconst Date: Thu, 3 Nov 2022 14:25:40 -0700 Subject: [PATCH 184/516] [libcxx] Fix the support requirements for `std::function` Objective-C++ test. The test requires two features to be supported but only checked for one of them. Differential Revision: https://reviews.llvm.org/D137294 --- .../libcxx/utilities/function.objects/func.blocks.arc.pass.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm b/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm index 186fe22e6e476..c9ace62000be3 100644 --- a/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm +++ b/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm @@ -12,7 +12,7 @@ // This test requires the Blocks runtime, which is (only?) available on Darwin // out-of-the-box. -// REQUIRES: has-fblocks && darwin +// REQUIRES: has-fblocks && has-fobjc-arc && darwin // ADDITIONAL_COMPILE_FLAGS: -fblocks -fobjc-arc From 3be864333a8843534465bcbf3d355fcd12b42369 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 7 Oct 2022 16:12:52 -0700 Subject: [PATCH 185/516] [lldb/crashlog] Add support for 32bit frame addresses This patch adds support for 32bit stack frame addresses in the `crashlog` command. For crash reports that are generated from a arm64_32 process, `PAGEZERO` is loaded at 0x00004000 so no code address will be less than 0x4000. This patch changes the crashlog frame address regex group to match addresses as small as 4 hex characters. rdar://100805026 Differential Revision: https://reviews.llvm.org/D135310 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index e80ecd91579ad..77d54ce303868 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -613,10 +613,10 @@ class TextCrashLogParser(CrashLogParser): frame_regex = re.compile(r'^(\d+)\s+' # id r'(.+?)\s+' # img_name r'(?:' +version+ r'\s+)?' # img_version - r'(0x[0-9a-fA-F]{7,})' # addr (7 chars or more) + r'(0x[0-9a-fA-F]{4,})' # addr (4 chars or more) r' +(.*)' # offs ) - null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{7,} +') + null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +') image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo r'\s+-\s+' # - r'(0x[0-9a-fA-F]+)\s+' # img_hi From 42df155ae628b4ae756a858bd09b105ee10b86eb Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 7 Oct 2022 16:08:52 -0700 Subject: [PATCH 186/516] [lldb/crashlog] Fix the image_regex_uuid to skip null UUID images This patch updates the image_regex_uuid matcher to match null-UUID images in the plain text crashlog parser. It updates the regex to match one or more '?' characters or the image full path. rdar://100904019 Differential Revision: https://reviews.llvm.org/D135482 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 2 +- .../Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 77d54ce303868..8aaf7c165684a 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -623,7 +623,7 @@ class TextCrashLogParser(CrashLogParser): r'[+]?(.+?)\s+' # img_name r'(?:(' +version+ r')\s+)?' # img_version r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid - r'(/.*)' # img_path + r'(\?+|/.*)' # img_path ) exception_type_regex = re.compile(r'^Exception Type:\s+(EXC_[A-Z_]+)(?:\s+\((.*)\))?') exception_codes_regex = re.compile(r'^Exception Codes:\s+(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)') diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash index c02150c7f15a9..16a95586a13b4 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash @@ -47,3 +47,4 @@ Trap Number: 14 Binary Images: 0x100000000 - 0x200000000 +a.out (0) <@UUID@> @EXEC@ + 0x0 - 0xffffffffffffffff ??? (*) <00000000-0000-0000-0000-000000000000> ??? From cc05487a834e55659072918393f5c7490af67ed2 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 7 Oct 2022 16:11:49 -0700 Subject: [PATCH 187/516] [lldb/Plugins] Cleanup error handling in Scripted{Process,Thread} (NFC) Signed-off-by: Med Ismail Bennani --- .../Plugins/Process/scripted/ScriptedProcess.cpp | 4 ++-- .../Plugins/Process/scripted/ScriptedThread.cpp | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 11692cbb69d48..174c00e985595 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -411,7 +411,7 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { StructuredData::ArraySP loaded_images_sp = GetInterface().GetLoadedImages(); if (!loaded_images_sp || !loaded_images_sp->GetSize()) - return GetInterface().ErrorWithMessage( + return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, "No loaded images.", error); ModuleList module_list; @@ -477,7 +477,7 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { }; if (!loaded_images_sp->ForEach(reload_image)) - return GetInterface().ErrorWithMessage( + return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, "Couldn't reload all images.", error); target.ModulesDidLoad(module_list); diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index c655ec12ecda3..b19331b5b1082 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -171,9 +171,7 @@ bool ScriptedThread::LoadArtificialStackFrames() { StackFrameListSP frames = GetStackFrameList(); for (size_t idx = 0; idx < arr_size; idx++) { - StructuredData::Dictionary *dict; - if (!arr_sp->GetItemAtIndexAsDictionary(idx, dict) || !dict) return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, @@ -334,11 +332,10 @@ std::shared_ptr ScriptedThread::GetDynamicRegisterInfo() { Status error; if (!reg_info) - return GetInterface() - ->ErrorWithMessage>( - LLVM_PRETTY_FUNCTION, - "Failed to get scripted thread registers info.", error, - LLDBLog::Thread); + return ScriptedInterface::ErrorWithMessage< + std::shared_ptr>( + LLVM_PRETTY_FUNCTION, "Failed to get scripted thread registers info.", + error, LLDBLog::Thread); m_register_info_sp = std::make_shared( *reg_info, m_scripted_process.GetTarget().GetArchitecture()); From 268628cb79b0f8bb0edec86d9d500c16eadd516a Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 7 Oct 2022 18:20:17 -0700 Subject: [PATCH 188/516] [lldb/Commands] Add newline for extended backtrace thread (NFCI) This adds a new line between the real thread and the extended backtrace thread when it's available. This should improve readability for the user. Signed-off-by: Med Ismail Bennani --- lldb/source/Commands/CommandObjectThread.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index bfe85043f3703..5e817635bbe6b 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -189,6 +189,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads { if (ext_thread_sp && ext_thread_sp->IsValid()) { const uint32_t num_frames_with_source = 0; const bool stop_format = false; + strm.PutChar('\n'); if (ext_thread_sp->GetStatus(strm, m_options.m_start, m_options.m_count, num_frames_with_source, stop_format)) { From 90608963d40b4765fc95e039d5100940ad822535 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 10 Oct 2022 13:45:45 -0700 Subject: [PATCH 189/516] [lldb/Utility] Fix StructuredData::ParseJSONValue for null items This patch fixes the JSON parser for StructuredData to handle JSON null entries. Differential Revision: https://reviews.llvm.org/D135616 Signed-off-by: Med Ismail Bennani --- lldb/source/Utility/StructuredData.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp index fc10fa539e9e5..2e023344f3ddb 100644 --- a/lldb/source/Utility/StructuredData.cpp +++ b/lldb/source/Utility/StructuredData.cpp @@ -69,6 +69,9 @@ static StructuredData::ObjectSP ParseJSONValue(json::Value &value) { if (auto d = value.getAsNumber()) return std::make_shared(*d); + if (auto n = value.getAsNull()) + return std::make_shared(); + return StructuredData::ObjectSP(); } From e861d053dd43f2e5a63f150ee2f9d1d643ea29c1 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 3 Nov 2022 11:25:40 -0700 Subject: [PATCH 190/516] [lldb/Utility] Add GetDescription(Stream&) to StructureData::* This patch improves the StructuredData classes to provide a GetDescription(lldb_private::Stream&) affordance. This is very convenient compared to the Dump method because this try to pretty print the structure instead of just serializing it into a JSON. This patch also updates some parts of lldb (i.e. extended crash info) to use this new affordance instead of StructuredData::Dump. Differential Revision: https://reviews.llvm.org/D135547 Signed-off-by: Med Ismail Bennani --- lldb/include/lldb/Core/StructuredDataImpl.h | 2 +- lldb/include/lldb/Utility/StructuredData.h | 23 ++++ lldb/source/Commands/CommandObjectProcess.cpp | 3 +- lldb/source/Utility/StructuredData.cpp | 100 ++++++++++++++++++ .../TestProcessCrashInfo.py | 4 +- lldb/unittests/Utility/CMakeLists.txt | 6 +- .../Utility/Inputs/StructuredData-full.json | 15 +++ .../Utility/Inputs/StructuredData-nested.json | 14 +++ lldb/unittests/Utility/StructuredDataTest.cpp | 67 ++++++++++++ 9 files changed, 230 insertions(+), 4 deletions(-) create mode 100644 lldb/unittests/Utility/Inputs/StructuredData-full.json create mode 100644 lldb/unittests/Utility/Inputs/StructuredData-nested.json diff --git a/lldb/include/lldb/Core/StructuredDataImpl.h b/lldb/include/lldb/Core/StructuredDataImpl.h index e755c53aaa9f6..16dbc5263b285 100644 --- a/lldb/include/lldb/Core/StructuredDataImpl.h +++ b/lldb/include/lldb/Core/StructuredDataImpl.h @@ -80,7 +80,7 @@ class StructuredDataImpl { error.SetErrorString("No data to describe."); return error; } - m_data_sp->Dump(stream, true); + m_data_sp->GetDescription(stream); return error; } // Get the data's description. diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h index 9f6300f4f115b..5420c0dcf8d5a 100644 --- a/lldb/include/lldb/Utility/StructuredData.h +++ b/lldb/include/lldb/Utility/StructuredData.h @@ -158,6 +158,12 @@ class StructuredData { Serialize(jso); } + virtual void GetDescription(lldb_private::Stream &s) const { + s.IndentMore(); + Dump(s, false); + s.IndentLess(); + } + private: lldb::StructuredDataType m_type; }; @@ -277,6 +283,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: typedef std::vector collection; collection m_items; @@ -295,6 +303,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: uint64_t m_value; }; @@ -312,6 +322,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: double m_value; }; @@ -329,6 +341,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: bool m_value; }; @@ -345,6 +359,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: std::string m_value; }; @@ -524,6 +540,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: typedef std::map collection; collection m_dict; @@ -538,6 +556,8 @@ class StructuredData { bool IsValid() const override { return false; } void Serialize(llvm::json::OStream &s) const override; + + void GetDescription(lldb_private::Stream &s) const override; }; class Generic : public Object { @@ -553,12 +573,15 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + private: void *m_object; }; static ObjectSP ParseJSON(const std::string &json_text); static ObjectSP ParseJSONFromFile(const FileSpec &file, Status &error); + static bool IsRecordType(const ObjectSP object_sp); }; } // namespace lldb_private diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 28a99ea3d94a5..92544c564e532 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1537,8 +1537,9 @@ class CommandObjectProcessStatus : public CommandObjectParsed { StructuredData::DictionarySP crash_info_sp = *expected_crash_info; if (crash_info_sp) { + strm.EOL(); strm.PutCString("Extended Crash Information:\n"); - crash_info_sp->Dump(strm); + crash_info_sp->GetDescription(strm); } } diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp index 2e023344f3ddb..acc09289e6b98 100644 --- a/lldb/source/Utility/StructuredData.cpp +++ b/lldb/source/Utility/StructuredData.cpp @@ -50,6 +50,11 @@ StructuredData::ParseJSONFromFile(const FileSpec &input_spec, Status &error) { return StructuredData::ObjectSP(); } +bool StructuredData::IsRecordType(const ObjectSP object_sp) { + return object_sp->GetType() == lldb::eStructuredDataTypeArray || + object_sp->GetType() == lldb::eStructuredDataTypeDictionary; +} + static StructuredData::ObjectSP ParseJSONValue(json::Value &value) { if (json::Object *O = value.getAsObject()) return ParseJSONObject(O); @@ -175,3 +180,98 @@ void StructuredData::Null::Serialize(json::OStream &s) const { void StructuredData::Generic::Serialize(json::OStream &s) const { s.value(llvm::formatv("{0:X}", m_object)); } + +void StructuredData::Integer::GetDescription(lldb_private::Stream &s) const { + s.Printf("%" PRId64, static_cast(m_value)); +} + +void StructuredData::Float::GetDescription(lldb_private::Stream &s) const { + s.Printf("%f", m_value); +} + +void StructuredData::Boolean::GetDescription(lldb_private::Stream &s) const { + s.Printf(m_value ? "True" : "False"); +} + +void StructuredData::String::GetDescription(lldb_private::Stream &s) const { + s.Printf("%s", m_value.empty() ? "\"\"" : m_value.c_str()); +} + +void StructuredData::Array::GetDescription(lldb_private::Stream &s) const { + size_t index = 0; + size_t indentation_level = s.GetIndentLevel(); + for (const auto &item_sp : m_items) { + // Sanitize. + if (!item_sp) + continue; + + // Reset original indentation level. + s.SetIndentLevel(indentation_level); + s.Indent(); + + // Print key + s.Printf("[%zu]:", index++); + + // Return to new line and increase indentation if value is record type. + // Otherwise add spacing. + bool should_indent = IsRecordType(item_sp); + if (should_indent) { + s.EOL(); + s.IndentMore(); + } else { + s.PutChar(' '); + } + + // Print value and new line if now last pair. + item_sp->GetDescription(s); + if (item_sp != *(--m_items.end())) + s.EOL(); + + // Reset indentation level if it was incremented previously. + if (should_indent) + s.IndentLess(); + } +} + +void StructuredData::Dictionary::GetDescription(lldb_private::Stream &s) const { + size_t indentation_level = s.GetIndentLevel(); + for (const auto &pair : m_dict) { + // Sanitize. + if (pair.first.IsNull() || pair.first.IsEmpty() || !pair.second) + continue; + + // Reset original indentation level. + s.SetIndentLevel(indentation_level); + s.Indent(); + + // Print key. + s.Printf("%s:", pair.first.AsCString()); + + // Return to new line and increase indentation if value is record type. + // Otherwise add spacing. + bool should_indent = IsRecordType(pair.second); + if (should_indent) { + s.EOL(); + s.IndentMore(); + } else { + s.PutChar(' '); + } + + // Print value and new line if now last pair. + pair.second->GetDescription(s); + if (pair != *(--m_dict.end())) + s.EOL(); + + // Reset indentation level if it was incremented previously. + if (should_indent) + s.IndentLess(); + } +} + +void StructuredData::Null::GetDescription(lldb_private::Stream &s) const { + s.Printf("NULL"); +} + +void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const { + s.Printf("%p", m_object); +} diff --git a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py index 30190e7c4df9b..659539c28a795 100644 --- a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py +++ b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py @@ -37,7 +37,9 @@ def test_cli(self): patterns=["Process .* launched: .*a.out"]) self.expect('process status --verbose', - patterns=["\"message\".*pointer being freed was not allocated"]) + patterns=["Extended Crash Information", + "crash-info annotations", + "pointer being freed was not allocated"]) @skipIfAsan # The test process intentionally hits a memory bug. diff --git a/lldb/unittests/Utility/CMakeLists.txt b/lldb/unittests/Utility/CMakeLists.txt index d697464600ff5..848a36215aa67 100644 --- a/lldb/unittests/Utility/CMakeLists.txt +++ b/lldb/unittests/Utility/CMakeLists.txt @@ -54,6 +54,10 @@ add_lldb_unittest(UtilityTests Support ) -add_unittest_inputs(UtilityTests +set(test_inputs StructuredData-basic.json + StructuredData-nested.json + StructuredData-full.json ) + +add_unittest_inputs(UtilityTests "${test_inputs}") diff --git a/lldb/unittests/Utility/Inputs/StructuredData-full.json b/lldb/unittests/Utility/Inputs/StructuredData-full.json new file mode 100644 index 0000000000000..4e4945cd6a280 --- /dev/null +++ b/lldb/unittests/Utility/Inputs/StructuredData-full.json @@ -0,0 +1,15 @@ +{ + "Array": [ + 3.14, + { + "key": "val" + } + ], + "Dictionary": { + "FalseBool": false + }, + "Integer": 1, + "Null": null, + "String": "value", + "TrueBool": true +} diff --git a/lldb/unittests/Utility/Inputs/StructuredData-nested.json b/lldb/unittests/Utility/Inputs/StructuredData-nested.json new file mode 100644 index 0000000000000..facf461bb6c1f --- /dev/null +++ b/lldb/unittests/Utility/Inputs/StructuredData-nested.json @@ -0,0 +1,14 @@ +{ + "my_dict": [ + { + "three": 3, + "two": 2 + }, + { + "four": { + "val": 4 + } + }, + 1 + ] +} diff --git a/lldb/unittests/Utility/StructuredDataTest.cpp b/lldb/unittests/Utility/StructuredDataTest.cpp index cb5e418cd958e..e732016fe43db 100644 --- a/lldb/unittests/Utility/StructuredDataTest.cpp +++ b/lldb/unittests/Utility/StructuredDataTest.cpp @@ -31,6 +31,73 @@ TEST(StructuredDataTest, StringDump) { } } +TEST(StructuredDataTest, GetDescriptionEmpty) { + Status status; + auto object_sp = StructuredData::ParseJSON("{}"); + ASSERT_NE(nullptr, object_sp); + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(0, S.GetSize()); +} + +TEST(StructuredDataTest, GetDescriptionBasic) { + Status status; + std::string input = GetInputFilePath("StructuredData-basic.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "[0]: 1\n" + "[1]: 2\n" + "[2]: 3"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + +TEST(StructuredDataTest, GetDescriptionNested) { + Status status; + std::string input = GetInputFilePath("StructuredData-nested.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "my_dict:\n" + " [0]:\n" + " three: 3\n" + " two: 2\n" + " [1]:\n" + " four:\n" + " val: 4\n" + " [2]: 1"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + +TEST(StructuredDataTest, GetDescriptionFull) { + Status status; + std::string input = GetInputFilePath("StructuredData-full.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "Array:\n" + " [0]: 3.140000\n" + " [1]:\n" + " key: val\n" + "Dictionary:\n" + " FalseBool: False\n" + "Integer: 1\n" + "Null: NULL\n" + "String: value\n" + "TrueBool: True"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + TEST(StructuredDataTest, ParseJSONFromFile) { Status status; auto object_sp = StructuredData::ParseJSONFromFile( From 78d6e1d1d4b3b5c6bdd779256c915a8ac7148174 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 3 Nov 2022 14:27:28 -0700 Subject: [PATCH 191/516] [lldb/crashlog] Add support for Application Specific Backtraces & Information For an exception crashlog, the thread backtraces aren't usually very helpful and instead, developpers look at the "Application Specific Backtrace" that was generated by `objc_exception_throw`. LLDB could already parse and symbolicate these Application Specific Backtraces for regular textual-based crashlog, so this patch adds support to parse them in JSON crashlogs, and materialize them a HistoryThread extending the crashed ScriptedThread. This patch also includes the Application Specific Information messages as part of the process extended crash information log. To do so, the ScriptedProcess Python interface has a new GetMetadata method that returns an arbitrary dictionary with data related to the process. rdar://93207586 Differential Revision: https://reviews.llvm.org/D126260 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 35 +- .../crashlog_scripted_process.py | 59 ++- .../scripted_process/scripted_process.py | 22 +- .../Interpreter/ScriptedProcessInterface.h | 4 + lldb/include/lldb/Target/Process.h | 7 + .../Platform/MacOSX/PlatformDarwin.cpp | 53 ++- .../Plugins/Platform/MacOSX/PlatformDarwin.h | 4 + .../Process/scripted/ScriptedProcess.cpp | 13 + .../Process/scripted/ScriptedProcess.h | 4 +- .../Process/scripted/ScriptedThread.cpp | 13 + .../Plugins/Process/scripted/ScriptedThread.h | 2 + .../Python/ScriptedProcessPythonInterface.cpp | 11 + .../Python/ScriptedProcessPythonInterface.h | 2 + .../Python/ScriptedThreadPythonInterface.cpp | 11 + .../Python/ScriptedThreadPythonInterface.h | 2 + .../MacOSX/SystemRuntimeMacOSX.cpp | 41 ++ .../TestProcessCrashInfo.py | 2 +- .../Inputs/application_specific_info/asi.ips | 131 ++++++ .../Inputs/application_specific_info/asi.yaml | 392 ++++++++++++++++++ .../Inputs/application_specific_info/main.m | 13 + .../app_specific_backtrace_crashlog.test | 52 +++ 21 files changed, 847 insertions(+), 26 deletions(-) create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 8aaf7c165684a..47250f3b350f1 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -462,6 +462,12 @@ def parse(self): self.parse_images(self.data['usedImages']) self.parse_main_image(self.data) self.parse_threads(self.data['threads']) + if 'asi' in self.data: + self.crashlog.asi = self.data['asi'] + if 'asiBacktraces' in self.data: + self.parse_app_specific_backtraces(self.data['asiBacktraces']) + if 'lastExceptionBacktrace' in self.data: + self.crashlog.asb = self.data['lastExceptionBacktrace'] self.parse_errors(self.data) thread = self.crashlog.threads[self.crashlog.crashed_thread_idx] reason = self.parse_crash_reason(self.data['exception']) @@ -573,6 +579,31 @@ def parse_threads(self, json_threads): self.crashlog.threads.append(thread) idx += 1 + def parse_asi_backtrace(self, thread, bt): + for line in bt.split('\n'): + frame_match = TextCrashLogParser.frame_regex.search(line) + if not frame_match: + print("error: can't parse application specific backtrace.") + return False + + (frame_id, frame_img_name, frame_addr, + frame_ofs) = frame_match.groups() + + thread.add_ident(frame_img_name) + if frame_img_name not in self.crashlog.idents: + self.crashlog.idents.append(frame_img_name) + thread.frames.append(self.crashlog.Frame(int(frame_id), int( + frame_addr, 0), frame_ofs)) + + return True + + def parse_app_specific_backtraces(self, json_app_specific_bts): + for idx, backtrace in enumerate(json_app_specific_bts): + thread = self.crashlog.Thread(idx, True) + thread.queue = "Application Specific Backtrace" + if self.parse_asi_backtrace(thread, backtrace): + self.crashlog.threads.append(thread) + def parse_thread_registers(self, json_thread_state, prefix=None): registers = dict() for key, state in json_thread_state.items(): @@ -1102,8 +1133,8 @@ def synchronous(debugger): run_options.SetEchoCommands(True) commands_stream = lldb.SBStream() - commands_stream.Print("process status\n") - commands_stream.Print("thread backtrace\n") + commands_stream.Print("process status --verbose\n") + commands_stream.Print("thread backtrace --extended true\n") error = debugger.SetInputString(commands_stream.GetData()) if error.Success(): debugger.RunCommandInterpreter(True, False, run_options, 0, False, True) diff --git a/lldb/examples/python/scripted_process/crashlog_scripted_process.py b/lldb/examples/python/scripted_process/crashlog_scripted_process.py index e64b9b7822af1..55c50917c9d67 100644 --- a/lldb/examples/python/scripted_process/crashlog_scripted_process.py +++ b/lldb/examples/python/scripted_process/crashlog_scripted_process.py @@ -18,6 +18,11 @@ def parse_crashlog(self): self.crashed_thread_idx = crash_log.crashed_thread_idx self.loaded_images = [] self.exception = crash_log.exception + self.app_specific_thread = None + if hasattr(crash_log, 'asi'): + self.metadata['asi'] = crash_log.asi + if hasattr(crash_log, 'asb'): + self.extended_thread_info = crash_log.asb def load_images(self, images): #TODO: Add to self.loaded_images and load images in lldb @@ -40,8 +45,23 @@ def load_images(self, images): for ident in thread.idents: load_images(self, crash_log.find_images_with_identifier(ident)) + if hasattr(thread, 'app_specific_backtrace') and thread.app_specific_backtrace: + # We don't want to include the Application Specific Backtrace + # Thread into the Scripted Process' Thread list. + # Instead, we will try to extract the stackframe pcs from the + # backtrace and inject that as the extended thread info. + self.app_specific_thread = thread + continue + self.threads[thread.index] = CrashLogScriptedThread(self, None, thread) + + if self.app_specific_thread: + self.extended_thread_info = \ + CrashLogScriptedThread.resolve_stackframes(self.app_specific_thread, + self.addr_mask, + self.target) + def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): super().__init__(target, args) @@ -71,6 +91,7 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): self.pid = super().get_process_id() self.crashed_thread_idx = 0 self.exception = None + self.extended_thread_info = None self.parse_crashlog() def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo: @@ -103,6 +124,9 @@ def is_alive(self) -> bool: def get_scripted_thread_plugin(self): return CrashLogScriptedThread.__module__ + "." + CrashLogScriptedThread.__name__ + def get_process_metadata(self): + return self.metadata + class CrashLogScriptedThread(ScriptedThread): def create_register_ctx(self): if not self.has_crashed: @@ -120,6 +144,19 @@ def create_register_ctx(self): return self.register_ctx + def resolve_stackframes(thread, addr_mask, target): + frames = [] + for frame in thread.frames: + frame_pc = frame.pc & addr_mask + pc = frame_pc if frame.index == 0 or frame_pc == 0 else frame_pc - 1 + sym_addr = lldb.SBAddress() + sym_addr.SetLoadAddress(pc, target) + if not sym_addr.IsValid(): + continue + frames.append({"idx": frame.index, "pc": pc}) + return frames + + def create_stackframes(self): if not (self.scripted_process.load_all_images or self.has_crashed): return None @@ -127,14 +164,9 @@ def create_stackframes(self): if not self.backing_thread or not len(self.backing_thread.frames): return None - for frame in self.backing_thread.frames: - frame_pc = frame.pc & self.scripted_process.addr_mask - pc = frame_pc if frame.index == 0 or frame_pc == 0 else frame_pc - 1 - sym_addr = lldb.SBAddress() - sym_addr.SetLoadAddress(pc, self.target) - if not sym_addr.IsValid(): - continue - self.frames.append({"idx": frame.index, "pc": pc}) + self.frames = CrashLogScriptedThread.resolve_stackframes(self.backing_thread, + self.scripted_process.addr_mask, + self.target) return self.frames @@ -144,7 +176,10 @@ def __init__(self, process, args, crashlog_thread): self.backing_thread = crashlog_thread self.idx = self.backing_thread.index self.tid = self.backing_thread.id - self.name = self.backing_thread.name + if self.backing_thread.app_specific_backtrace: + self.name = "Application Specific Backtrace - " + str(self.idx) + else: + self.name = self.backing_thread.name self.queue = self.backing_thread.queue self.has_crashed = (self.scripted_process.crashed_thread_idx == self.idx) self.create_stackframes() @@ -168,3 +203,9 @@ def get_register_context(self) -> str: self.register_ctx = self.create_register_ctx() return struct.pack("{}Q".format(len(self.register_ctx)), *self.register_ctx.values()) + + def get_extended_info(self): + if (self.has_crashed): + self.extended_info = self.scripted_process.extended_thread_info + return self.extended_info + diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py index 48966f8385cb0..43eb97dbd7723 100644 --- a/lldb/examples/python/scripted_process/scripted_process.py +++ b/lldb/examples/python/scripted_process/scripted_process.py @@ -18,6 +18,7 @@ class ScriptedProcess(metaclass=ABCMeta): stack_memory_dump = None loaded_images = None threads = None + metadata = None @abstractmethod def __init__(self, target, args): @@ -41,6 +42,7 @@ def __init__(self, target, args): self.args = args self.threads = {} self.loaded_images = [] + self.metadata = {} @abstractmethod def get_memory_region_containing_address(self, addr): @@ -138,7 +140,6 @@ def get_process_id(self): """ return 0 - def launch(self): """ Simulate the scripted process launch. @@ -191,6 +192,15 @@ def get_scripted_thread_plugin(self): """ return None + def get_process_metadata(self): + """ Get some metadata for the scripted process. + + Returns: + Dict: A dictionary containing metadata for the scripted process. + None is the process as no metadata. + """ + return self.metadata + class ScriptedThread(metaclass=ABCMeta): """ @@ -226,6 +236,7 @@ def __init__(self, scripted_process, args): self.register_info = None self.register_ctx = {} self.frames = [] + self.extended_info = [] if isinstance(scripted_process, ScriptedProcess): self.target = scripted_process.target @@ -334,6 +345,15 @@ def get_register_context(self): """ pass + def get_extended_info(self): + """ Get scripted thread extended information. + + Returns: + List: A list containing the extended information for the scripted process. + None is the thread as no extended information. + """ + return self.extended_info + ARM64_GPR = [ {'name': 'x0', 'bitsize': 64, 'offset': 0, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 0, 'dwarf': 0, 'generic': 'arg0', 'alt-name': 'arg0'}, {'name': 'x1', 'bitsize': 64, 'offset': 8, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 1, 'dwarf': 1, 'generic': 'arg1', 'alt-name': 'arg1'}, {'name': 'x2', 'bitsize': 64, 'offset': 16, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 2, 'dwarf': 2, 'generic': 'arg2', 'alt-name': 'arg2'}, diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h index 905623e575f71..164ec9b9dd605 100644 --- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h @@ -66,6 +66,8 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return llvm::None; } + virtual StructuredData::DictionarySP GetMetadata() { return nullptr; } + protected: friend class ScriptedThread; virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { @@ -99,6 +101,8 @@ class ScriptedThreadInterface : virtual public ScriptedInterface { virtual llvm::Optional GetRegisterContext() { return llvm::None; } + + virtual StructuredData::ArraySP GetExtendedInfo() { return nullptr; } }; } // namespace lldb_private diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 6975eb8029de0..b9995c2a44326 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -2423,6 +2423,13 @@ void PruneThreadPlans(); return Status("Not supported"); } + /// Fetch process defined metadata. + /// + /// \return + /// A StructuredDataSP object which, if non-empty, will contain the + /// information related to the process. + virtual StructuredData::DictionarySP GetMetadata() { return nullptr; } + size_t AddImageToken(lldb::addr_t image_ptr); lldb::addr_t GetImagePtrFromToken(size_t token) const; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 21b733a62bbbb..9d89148616be1 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -857,21 +857,20 @@ PlatformDarwin::ParseVersionBuildDir(llvm::StringRef dir) { llvm::Expected PlatformDarwin::FetchExtendedCrashInformation(Process &process) { - Log *log = GetLog(LLDBLog::Process); - - StructuredData::ArraySP annotations = ExtractCrashInfoAnnotations(process); - - if (!annotations || !annotations->GetSize()) { - LLDB_LOG(log, "Couldn't extract crash information annotations"); - return nullptr; - } - StructuredData::DictionarySP extended_crash_info = std::make_shared(); - extended_crash_info->AddItem("crash-info annotations", annotations); + StructuredData::ArraySP annotations = ExtractCrashInfoAnnotations(process); + if (annotations && annotations->GetSize()) + extended_crash_info->AddItem("Crash-Info Annotations", annotations); + + StructuredData::DictionarySP app_specific_info = + ExtractAppSpecificInfo(process); + if (app_specific_info && app_specific_info->GetSize()) + extended_crash_info->AddItem("Application Specific Information", + app_specific_info); - return extended_crash_info; + return extended_crash_info->GetSize() ? extended_crash_info : nullptr; } StructuredData::ArraySP @@ -978,6 +977,38 @@ PlatformDarwin::ExtractCrashInfoAnnotations(Process &process) { return array_sp; } +StructuredData::DictionarySP +PlatformDarwin::ExtractAppSpecificInfo(Process &process) { + StructuredData::DictionarySP metadata_sp = process.GetMetadata(); + + if (!metadata_sp || !metadata_sp->GetSize() || !metadata_sp->HasKey("asi")) + return {}; + + StructuredData::Dictionary *asi; + if (!metadata_sp->GetValueForKeyAsDictionary("asi", asi)) + return {}; + + StructuredData::DictionarySP dict_sp = + std::make_shared(); + + auto flatten_asi_dict = [&dict_sp](ConstString key, + StructuredData::Object *val) -> bool { + if (!val) + return false; + + StructuredData::Array *arr = val->GetAsArray(); + if (!arr || !arr->GetSize()) + return false; + + dict_sp->AddItem(key.AsCString(), arr->GetItemAtIndex(0)); + return true; + }; + + asi->ForEach(flatten_asi_dict); + + return dict_sp; +} + void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( Target *target, std::vector &options, XcodeSDK::Type sdk_type) { const std::vector apple_arguments = { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 334410e91b4a2..36b52f4ca9eb3 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -154,6 +154,10 @@ class PlatformDarwin : public PlatformPOSIX { /// \b nullptr if process has no crash information annotations. StructuredData::ArraySP ExtractCrashInfoAnnotations(Process &process); + /// Extract the `Application Specific Information` messages from a crash + /// report. + StructuredData::DictionarySP ExtractAppSpecificInfo(Process &process); + void ReadLibdispatchOffsetsAddress(Process *process); void ReadLibdispatchOffsets(Process *process); diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 174c00e985595..e31d8bb769f85 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -485,6 +485,19 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { return loaded_images_sp; } +lldb_private::StructuredData::DictionarySP ScriptedProcess::GetMetadata() { + CheckInterpreterAndScriptObject(); + + StructuredData::DictionarySP metadata_sp = GetInterface().GetMetadata(); + + Status error; + if (!metadata_sp || !metadata_sp->GetSize()) + return ScriptedInterface::ErrorWithMessage( + LLVM_PRETTY_FUNCTION, "No metadata.", error); + + return metadata_sp; +} + ScriptedProcessInterface &ScriptedProcess::GetInterface() const { return m_interpreter->GetScriptedProcessInterface(); } diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h index 465ef7b64ecd7..e8f8dd4a965d5 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h @@ -59,8 +59,6 @@ class ScriptedProcess : public Process { llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - SystemRuntime *GetSystemRuntime() override { return nullptr; } - Status DoLoadCore() override; Status DoLaunch(Module *exe_module, ProcessLaunchInfo &launch_info) override; @@ -88,6 +86,8 @@ class ScriptedProcess : public Process { lldb_private::StructuredData::ObjectSP GetLoadedDynamicLibrariesInfos() override; + lldb_private::StructuredData::DictionarySP GetMetadata() override; + protected: ScriptedProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp, const ScriptedProcess::ScriptedProcessInfo &launch_info, diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index b19331b5b1082..f13cdd3a4c33c 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -343,3 +343,16 @@ std::shared_ptr ScriptedThread::GetDynamicRegisterInfo() { return m_register_info_sp; } + +StructuredData::ObjectSP ScriptedThread::FetchThreadExtendedInfo() { + CheckInterpreterAndScriptObject(); + + Status error; + StructuredData::ArraySP extended_info_sp = GetInterface()->GetExtendedInfo(); + + if (!extended_info_sp || !extended_info_sp->GetSize()) + return ScriptedInterface::ErrorWithMessage( + LLVM_PRETTY_FUNCTION, "No extended information found", error); + + return extended_info_sp; +} diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h index 959f498edf240..cd224d60ceef8 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h @@ -58,6 +58,8 @@ class ScriptedThread : public lldb_private::Thread { void ClearStackFrames() override; + StructuredData::ObjectSP FetchThreadExtendedInfo() override; + private: void CheckInterpreterAndScriptObject() const; lldb::ScriptedThreadInterfaceSP GetInterface() const; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp index 576bf69c9258e..ffce8c468cab8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp @@ -177,4 +177,15 @@ ScriptedProcessPythonInterface::CreateScriptedThreadInterface() { return std::make_shared(m_interpreter); } +StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { + Status error; + StructuredData::DictionarySP dict = + Dispatch("get_process_metadata", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + return {}; + + return dict; +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h index 7f458b1dd9bdb..622d225853040 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h @@ -57,6 +57,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, llvm::Optional GetScriptedThreadPluginName() override; + StructuredData::DictionarySP GetMetadata() override; + private: lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; }; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp index 3ff592fb83cd7..d52a9c2d81f97 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp @@ -144,4 +144,15 @@ ScriptedThreadPythonInterface::GetRegisterContext() { return obj->GetAsString()->GetValue().str(); } +StructuredData::ArraySP ScriptedThreadPythonInterface::GetExtendedInfo() { + Status error; + StructuredData::ArraySP arr = + Dispatch("get_extended_info", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) + return {}; + + return arr; +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h index 59bb182ae3f3d..63ce1c1ab288f 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h @@ -42,6 +42,8 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface, StructuredData::DictionarySP GetRegisterInfo() override; llvm::Optional GetRegisterContext() override; + + StructuredData::ArraySP GetExtendedInfo() override; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp index 7a56264f87c9b..d4d164a77d732 100644 --- a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp +++ b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp @@ -502,6 +502,46 @@ ThreadSP SystemRuntimeMacOSX::GetExtendedBacktraceThread(ThreadSP real_thread, m_page_to_free_size = ret.item_buffer_size; } } + } else if (type == "Application Specific Backtrace") { + StructuredData::ObjectSP thread_extended_sp = + real_thread->GetExtendedInfo(); + + if (!thread_extended_sp) + return {}; + + StructuredData::Array *thread_extended_info = + thread_extended_sp->GetAsArray(); + + if (!thread_extended_info || !thread_extended_info->GetSize()) + return {}; + + std::vector app_specific_backtrace_pcs; + + auto extract_frame_pc = + [&app_specific_backtrace_pcs](StructuredData::Object *obj) -> bool { + if (!obj) + return false; + + StructuredData::Dictionary *dict = obj->GetAsDictionary(); + if (!dict) + return false; + + lldb::addr_t pc = LLDB_INVALID_ADDRESS; + if (!dict->GetValueForKeyAsInteger("pc", pc)) + return false; + + app_specific_backtrace_pcs.push_back(pc); + + return pc != LLDB_INVALID_ADDRESS; + }; + + if (!thread_extended_info->ForEach(extract_frame_pc)) + return {}; + + originating_thread_sp = + std::make_shared(*m_process, real_thread->GetIndexID(), + app_specific_backtrace_pcs, true); + originating_thread_sp->SetQueueName(type.AsCString()); } return originating_thread_sp; } @@ -674,6 +714,7 @@ const std::vector & SystemRuntimeMacOSX::GetExtendedBacktraceTypes() { if (m_types.size() == 0) { m_types.push_back(ConstString("libdispatch")); + m_types.push_back(ConstString("Application Specific Backtrace")); // We could have pthread as another type in the future if we have a way of // gathering that information & it's useful to distinguish between them. } diff --git a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py index 659539c28a795..c0d380aca2849 100644 --- a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py +++ b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py @@ -38,7 +38,7 @@ def test_cli(self): self.expect('process status --verbose', patterns=["Extended Crash Information", - "crash-info annotations", + "Crash-Info Annotations", "pointer being freed was not allocated"]) diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips new file mode 100644 index 0000000000000..8d151a3be0370 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips @@ -0,0 +1,131 @@ +{"app_name":"asi","timestamp":"2022-10-07 11:31:53.00 -0700","app_version":"","slice_uuid":"2cee52c2-2d9c-3e64-bdd0-c43ccd1b37ec","build_version":"","platform":1,"share_with_app_devs":0,"is_first_party":1,"bug_type":"309","os_version":"macOS 13.0","roots_installed":0,"incident_id":"E62DF457-8BBC-4E92-AECA-11D1B55246E3","name":"asi"} +{ + "uptime" : 90000, + "procRole" : "Unspecified", + "version" : 2, + "userID" : 501, + "deployVersion" : 210, + "modelCode" : "Mac13,1", + "coalitionID" : 495, + "osVersion" : { + "train" : "macOS 13.0", + "build" : "", + "releaseType" : "" + }, + "captureTime" : "2022-10-07 11:31:52.6211 -0700", + "incident" : "E62DF457-8BBC-4E92-AECA-11D1B55246E3", + "pid" : 96535, + "translated" : false, + "cpuType" : "ARM-64", + "roots_installed" : 0, + "bug_type" : "309", + "procLaunch" : "2022-10-07 11:31:52.4969 -0700", + "procStartAbsTime" : 2167631132529, + "procExitAbsTime" : 2167634104978, + "procName" : "asi", + "procPath" : "\/Users\/USER\/*\/asi", + "parentProc" : "zsh", + "parentPid" : 96199, + "coalitionName" : "com.apple.Terminal", + "crashReporterKey" : "533C17C1-DBB1-4134-1FDE-68346F18AAA2", + "responsiblePid" : 615, + "responsibleProc" : "Terminal", + "wakeTime" : 1351, + "sleepWakeUUID" : "AD23E0A0-A4A5-4B6B-925F-2FC3665C17BF", + "sip" : "enabled", + "exception" : {"codes":"0x0000000000000000, 0x0000000000000000","rawCodes":[0,0],"type":"EXC_CRASH","signal":"SIGABRT"}, + "asi" : {"CoreFoundation":["*** Terminating app due to uncaught exception 'NSRangeException', reason: '*** __boundsFail: index 10 beyond bounds [0 .. 3]'"],"libsystem_c.dylib":["abort() called"],"libc++abi.dylib":["terminating with uncaught exception of type NSException"]}, + "asiBacktraces" : ["0 CoreFoundation 0x00000001a0a58418 __exceptionPreprocess + 176\n1 libobjc.A.dylib 0x00000001a05a2ea8 objc_exception_throw + 60\n2 CoreFoundation 0x00000001a0b3dcc4 -[__NSCFString characterAtIndex:].cold.1 + 0\n3 CoreFoundation 0x00000001a0b46af4 -[__NSArrayI getObjects:range:].cold.1 + 0\n4 CoreFoundation 0x00000001a09a12a4 __CFPropertyListIsArrayPlistAux + 0\n5 asi 0x00000001047e3ed0 main + 128\n6 dyld 0x00000001a05d3e50 start + 2544"], + "extMods" : {"caller":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"system":{"thread_create":0,"thread_set_state":4,"task_for_pid":4},"targeted":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"warnings":0}, + "lastExceptionBacktrace" : [{"imageOffset":1033228,"symbol":"__exceptionPreprocess","symbolLocation":164,"imageIndex":5},{"imageOffset":110248,"symbol":"objc_exception_throw","symbolLocation":60,"imageIndex":4},{"imageOffset":1973444,"symbol":"-[__NSCFString characterAtIndex:].cold.1","symbolLocation":0,"imageIndex":5},{"imageOffset":2009844,"symbol":"-[__NSArrayI getObjects:range:].cold.1","symbolLocation":0,"imageIndex":5},{"imageOffset":283300,"symbol":"__CFPropertyListIsArrayPlistAux","symbolLocation":0,"imageIndex":5},{"imageOffset":16080,"symbol":"main","symbolLocation":128,"imageIndex":6},{"imageOffset":24144,"symbol":"start","symbolLocation":2544,"imageIndex":7}], + "faultingThread" : 0, + "threads" : [{"triggered":true,"id":1767667,"threadState":{"x":[{"value":0},{"value":0},{"value":0},{"value":0},{"value":6988476661},{"value":6096540848},{"value":110},{"value":512},{"value":502518818286880576},{"value":502518810403597248},{"value":512},{"value":11},{"value":11},{"value":2095104},{"value":2043},{"value":2195963912},{"value":328},{"value":8604857144},{"value":0},{"value":6},{"value":8522738816,"symbolLocation":0,"symbol":"_main_thread"},{"value":259},{"value":8522739040,"symbolLocation":224,"symbol":"_main_thread"},{"value":105553117118464},{"value":8528036928,"symbolLocation":0,"symbol":"gProcessInfo"},{"value":0},{"value":0},{"value":0},{"value":0}],"flavor":"ARM_THREAD_STATE64","lr":{"value":6988750060},"cpsr":{"value":1073745920},"fp":{"value":6096540704},"sp":{"value":6096540672},"esr":{"value":1442840704,"description":" Address size fault"},"pc":{"value":6988526116,"matchesCrashFrame":1},"far":{"value":5452680264}},"queue":"com.apple.main-thread","frames":[{"imageOffset":37412,"symbol":"__pthread_kill","symbolLocation":8,"imageIndex":0},{"imageOffset":27884,"symbol":"pthread_kill","symbolLocation":288,"imageIndex":1},{"imageOffset":496328,"symbol":"abort","symbolLocation":180,"imageIndex":2},{"imageOffset":72472,"symbol":"abort_message","symbolLocation":132,"imageIndex":3},{"imageOffset":6668,"symbol":"demangling_terminate_handler()","symbolLocation":336,"imageIndex":3},{"imageOffset":145252,"symbol":"_objc_terminate()","symbolLocation":144,"imageIndex":4},{"imageOffset":69300,"symbol":"std::__terminate(void (*)())","symbolLocation":20,"imageIndex":3},{"imageOffset":80940,"symbol":"__cxxabiv1::failed_throw(__cxxabiv1::__cxa_exception*)","symbolLocation":36,"imageIndex":3},{"imageOffset":80856,"symbol":"__cxa_throw","symbolLocation":140,"imageIndex":3},{"imageOffset":110600,"symbol":"objc_exception_throw","symbolLocation":412,"imageIndex":4},{"imageOffset":1973444,"symbol":"_CFThrowFormattedException","symbolLocation":108,"imageIndex":5},{"imageOffset":2009844,"symbol":"__boundsFail","symbolLocation":92,"imageIndex":5},{"imageOffset":283300,"symbol":"-[__NSArrayI objectAtIndex:]","symbolLocation":60,"imageIndex":5},{"imageOffset":16080,"symbol":"main","symbolLocation":128,"imageIndex":6},{"imageOffset":24144,"symbol":"start","symbolLocation":2544,"imageIndex":7}]}], + "usedImages" : [ + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988488704, + "size" : 233468, + "uuid" : "15147572-bf8d-359e-a6bb-97f4489e7f78", + "path" : "\/usr\/lib\/system\/libsystem_kernel.dylib", + "name" : "libsystem_kernel.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988722176, + "size" : 53244, + "uuid" : "19a65066-147a-37e1-be56-bd78821ef285", + "path" : "\/usr\/lib\/system\/libsystem_pthread.dylib", + "name" : "libsystem_pthread.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6987440128, + "size" : 528372, + "uuid" : "cd2fafb3-239f-3929-9b9d-ed1768c25159", + "path" : "\/usr\/lib\/system\/libsystem_c.dylib", + "name" : "libsystem_c.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988390400, + "size" : 98300, + "uuid" : "88025d90-bb66-34a8-8628-91ec5b3fb900", + "path" : "\/usr\/lib\/libc++abi.dylib", + "name" : "libc++abi.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6985121792, + "size" : 286112, + "uuid" : "9a019b6d-aeb6-3a3e-9c74-717c18dd5d43", + "path" : "\/usr\/lib\/libobjc.A.dylib", + "name" : "libobjc.A.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6989135872, + "CFBundleShortVersionString" : "6.9", + "CFBundleIdentifier" : "com.apple.CoreFoundation", + "size" : 5079040, + "uuid" : "0cb1d6ec-b4ee-33d5-9828-29db31cad6fc", + "path" : "\/System\/Library\/Frameworks\/CoreFoundation.framework\/Versions\/A\/CoreFoundation", + "name" : "CoreFoundation", + "CFBundleVersion" : "1953.1" + }, + { + "source" : "P", + "arch" : "arm64", + "base" : 4370333696, + "size" : 16384, + "uuid" : "2cee52c2-2d9c-3e64-bdd0-c43ccd1b37ec", + "path" : "\/Users\/USER\/*\/asi", + "name" : "asi" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6985408512, + "size" : 566452, + "uuid" : "0d973234-ed2d-3a07-889a-46b424e29ae0", + "path" : "\/usr\/lib\/dyld", + "name" : "dyld" + } +], + "sharedCache" : { + "base" : 6984761344, + "size" : 3405660160, + "uuid" : "5fe7ffdc-ba32-33ba-8827-d3d9094c6bc3" +}, + "vmSummary" : "ReadOnly portion of Libraries: Total=861.7M resident=0K(0%) swapped_out_or_unallocated=861.7M(100%)\nWritable regions: Total=666.4M written=0K(0%) resident=0K(0%) swapped_out=0K(0%) unallocated=666.4M(100%)\n\n VIRTUAL REGION \nREGION TYPE SIZE COUNT (non-coalesced) \n=========== ======= ======= \nActivity Tracing 256K 1 \nKernel Alloc Once 32K 1 \nMALLOC 154.2M 14 \nMALLOC guard page 96K 5 \nMALLOC_MEDIUM (reserved) 120.0M 1 reserved VM address space (unallocated)\nMALLOC_NANO (reserved) 384.0M 1 reserved VM address space (unallocated)\nSTACK GUARD 56.0M 1 \nStack 8176K 1 \n__AUTH 307K 58 \n__AUTH_CONST 3560K 142 \n__DATA 1494K 136 \n__DATA_CONST 3988K 144 \n__DATA_DIRTY 361K 58 \n__LINKEDIT 763.4M 2 \n__OBJC_CONST 289K 36 \n__OBJC_RO 65.1M 1 \n__OBJC_RW 1983K 1 \n__TEXT 98.3M 151 \ndyld private memory 256K 1 \nshared memory 80K 4 \n=========== ======= ======= \nTOTAL 1.6G 759 \nTOTAL, minus reserved VM space 1.1G 759 \n", + "legacyInfo" : { + "threadTriggered" : { + "queue" : "com.apple.main-thread" + } +} +} diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml new file mode 100644 index 0000000000000..31042daadd8a9 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml @@ -0,0 +1,392 @@ +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 21 + sizeofcmds: 1864 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 552 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 6 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100003E50 + size: 172 + offset: 0x3E50 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: FF8301D1FD7B05A9FD43019108008052A8431EB8BFC31FB8A0831FB8A1031FF829000094E11340F9E01700F9280000B0000940F9E8030091090000B029010191090100F9090000B029810191090500F9090000B029010291090900F91F0D00F9020000B0428000911A000094E11340F9A0831EF8A0835EF8420180D21D000094E8030091000100F9000000B00080029107000094E01740F908000094A0435EB8FD7B45A9FF830191C0035FD6 + - sectname: __stubs + segname: __TEXT + addr: 0x100003EFC + size: 36 + offset: 0x3EFC + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000408 + reserved1: 0x0 + reserved2: 0xC + reserved3: 0x0 + content: 100000B0100240F900021FD6100000B0100640F900021FD6100000B0100A40F900021FD6 + - sectname: __objc_stubs + segname: __TEXT + addr: 0x100003F20 + size: 64 + offset: 0x3F20 + align: 5 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 210000B0210040F9100000B0100E40F900021FD6200020D4200020D4200020D4210000B0210440F9100000B0100E40F900021FD6200020D4200020D4200020D4 + - sectname: __cstring + segname: __TEXT + addr: 0x100003F60 + size: 26 + offset: 0x3F60 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x2 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 4A696D004A61736F6E004A6F6E61730049736D61696C00254000 + - sectname: __objc_methname + segname: __TEXT + addr: 0x100003F7A + size: 34 + offset: 0x3F7A + align: 1 + reloff: 0x0 + nreloc: 0 + flags: 0x2 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 6F626A6563744174496E6465783A00006172726179576974684F626A656374733A00 + - sectname: __unwind_info + segname: __TEXT + addr: 0x100003F9C + size: 72 + offset: 0x3F9C + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 010000001C000000000000001C000000000000001C00000002000000503E00003400000034000000FD3E00000000000034000000030000000C000100100001000000000000000004 + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA_CONST + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 3 + flags: 16 + Sections: + - sectname: __got + segname: __DATA_CONST + addr: 0x100004000 + size: 32 + offset: 0x4000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x6 + reserved1: 0x3 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000000001080010000000000108002000000000010800300000000001080' + - sectname: __cfstring + segname: __DATA_CONST + addr: 0x100004020 + size: 160 + offset: 0x4020 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 0400000000002080C807000000000000603F00000000200003000000000000000400000000002080C807000000000000643F00000000200005000000000000000400000000002080C8070000000000006A3F00000000200005000000000000000400000000002080C807000000000000703F00000000200006000000000000000400000000002080C807000000000000773F0000000000000200000000000000 + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x1000040C0 + size: 8 + offset: 0x40C0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __DATA + vmaddr: 4295000064 + vmsize: 16384 + fileoff: 32768 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 2 + flags: 0 + Sections: + - sectname: __objc_selrefs + segname: __DATA + addr: 0x100008000 + size: 16 + offset: 0x8000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x10000005 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 8A3F0000000010007A3F000000001000 + - sectname: __objc_classrefs + segname: __DATA + addr: 0x100008010 + size: 8 + offset: 0x8010 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0500000000000080' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295016448 + vmsize: 16384 + fileoff: 49152 + filesize: 1264 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 49152 + datasize: 264 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 49416 + datasize: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 49472 + nsyms: 10 + stroff: 49664 + strsize: 224 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 2 + iextdefsym: 2 + nextdefsym: 2 + iundefsym: 4 + nundefsym: 6 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 49632 + nindirectsyms: 7 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + Content: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 2CEE52C2-2D9C-3E64-BDD0-C43CCD1B37EC + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 851968 + ntools: 1 + Tools: + - tool: 3 + version: 55836672 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 15952 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 96 + dylib: + name: 24 + timestamp: 2 + current_version: 127992064 + compatibility_version: 19660800 + Content: '/System/Library/Frameworks/Foundation.framework/Versions/C/Foundation' + ZeroPadBytes: 3 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 86441984 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_LOAD_DYLIB + cmdsize: 104 + dylib: + name: 24 + timestamp: 2 + current_version: 127992064 + compatibility_version: 9830400 + Content: '/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation' + ZeroPadBytes: 3 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 14942208 + compatibility_version: 65536 + Content: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 49464 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 49472 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 49888 + datasize: 528 +LinkEditData: + NameList: + - n_strx: 156 + n_type: 0x1E + n_sect: 3 + n_desc: 0 + n_value: 4294983456 + - n_strx: 188 + n_type: 0x1E + n_sect: 3 + n_desc: 0 + n_value: 4294983488 + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294983248 + - n_strx: 28 + n_type: 0x1 + n_sect: 0 + n_desc: 256 + n_value: 0 + - n_strx: 35 + n_type: 0x1 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 57 + n_type: 0x1 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 91 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + - n_strx: 116 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + - n_strx: 142 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + StringTable: + - ' ' + - __mh_execute_header + - _main + - _NSLog + - '_OBJC_CLASS_$_NSArray' + - ___CFConstantStringClassReference + - _objc_autoreleasePoolPop + - _objc_autoreleasePoolPush + - _objc_msgSend + - '_objc_msgSend$arrayWithObjects:' + - '_objc_msgSend$objectAtIndex:' + - '' + - '' + - '' + - '' + - '' + - '' + - '' + IndirectSymbols: [ 0x4, 0x7, 0x8, 0x4, 0x7, 0x8, 0x9 ] + FunctionStarts: [ 0x3E50 ] +... diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m new file mode 100644 index 0000000000000..e6745a81333d7 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m @@ -0,0 +1,13 @@ +#include + +int main(int argc, char *argv[]) { + @autoreleasepool { + + NSArray *crew = [NSArray arrayWithObjects:@"Jim", @"Jason", @"Jonas", @"Ismail", nil]; + + // This will throw an exception. + NSLog(@"%@", [crew objectAtIndex:10]); + } + + return 0; +} diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test new file mode 100644 index 0000000000000..266b1b4ee404d --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test @@ -0,0 +1,52 @@ +# REQUIRES: python, native && target-aarch64 && system-darwin + +# RUN: mkdir -p %t.dir +# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi +# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \ +# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.ips' \ +# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s + +# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands + +# CHECK: (lldb) process status --verbose +# CHECK-NEXT: Process 96535 stopped +# CHECK-NEXT: * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) +# CHECK-NEXT: frame #0: 0x00000001a08c7224{{.*}}[artificial] +# CHECK: Extended Crash Information: +# CHECK: Application Specific Information: +# CHECK-NEXT: CoreFoundation: *** Terminating app due to uncaught exception 'NSRangeException', reason: '*** __boundsFail: index 10 beyond bounds [0 .. 3]' +# CHECK-NEXT: libc++abi.dylib: terminating with uncaught exception of type NSException +# CHECK-NEXT: libsystem_c.dylib: abort() called + + +# CHECK: (lldb) thread backtrace --extended true +# CHECK-NEXT: * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) +# CHECK-NEXT: * frame #0: 0x00000001a08c7224{{.*}}[artificial] +# CHECK-NEXT: frame #1: 0x00000001a08fdceb{{.*}}[artificial] +# CHECK-NEXT: frame #2: 0x00000001a08372c7{{.*}}[artificial] +# CHECK-NEXT: frame #3: 0x00000001a08b7b17{{.*}}[artificial] +# CHECK-NEXT: frame #4: 0x00000001a08a7a0b{{.*}}[artificial] +# CHECK-NEXT: frame #5: 0x00000001a05ab763{{.*}}[artificial] +# CHECK-NEXT: frame #6: 0x00000001a08b6eb3{{.*}}[artificial] +# CHECK-NEXT: frame #7: 0x00000001a08b9c2b{{.*}}[artificial] +# CHECK-NEXT: frame #8: 0x00000001a08b9bd7{{.*}}[artificial] +# CHECK-NEXT: frame #9: 0x00000001a05a3007{{.*}}[artificial] +# CHECK-NEXT: frame #10: 0x00000001a0b3dcc3{{.*}}[artificial] +# CHECK-NEXT: frame #11: 0x00000001a0b46af3{{.*}}[artificial] +# CHECK-NEXT: frame #12: 0x00000001a09a12a3{{.*}}[artificial] +# CHECK-NEXT: frame #13: 0x00000001047e3ecf asi`main{{.*}}[artificial] +# CHECK-NEXT: frame #14: 0x00000001a05d3e4f{{.*}}[artificial] + +# CHECK: thread #4294967295: tid = 0x0001, 0x00000001a0a58418{{.*}}, queue = 'Application Specific Backtrace' +# CHECK-NEXT: frame #0: 0x00000001a0a58418{{.*}} +# CHECK-NEXT: frame #1: 0x00000001a05a2ea7{{.*}} +# CHECK-NEXT: frame #2: 0x00000001a0b3dcc3{{.*}} +# CHECK-NEXT: frame #3: 0x00000001a0b46af3{{.*}} +# CHECK-NEXT: frame #4: 0x00000001a09a12a3{{.*}} +# CHECK-NEXT: frame #5: 0x00000001047e3ecf asi`main{{.*}} +# CHECK-NEXT: frame #6: 0x00000001a05d3e4f dyld`start{{.*}} + + +# CHECK: (lldb) thread list +# CHECK-NEXT: Process 96535 stopped +# CHECK-NEXT: * thread #1: tid = 0x1af8f3, 0x00000001a08c7224{{.*}}, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) From 3350d5574864442d7c4120af25404762a840da00 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Thu, 3 Nov 2022 14:42:34 -0700 Subject: [PATCH 192/516] [lldb/Plugins] Use default initializers for StructuredData::*SP (NFC) This patch replaces the ScriptedProcessInterface getters to return default initializers for StructureData shared pointers instead of returning a null pointer. Differential Revision: https://reviews.llvm.org/D137359 Signed-off-by: Med Ismail Bennani --- .../Interpreter/ScriptedProcessInterface.h | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h index 164ec9b9dd605..2795c2f487dff 100644 --- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h @@ -24,7 +24,7 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj = nullptr) override { - return nullptr; + return {}; } virtual Status Launch() { return Status("ScriptedProcess did not launch"); } @@ -41,22 +41,22 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return {}; } - virtual StructuredData::DictionarySP GetThreadsInfo() { return nullptr; } + virtual StructuredData::DictionarySP GetThreadsInfo() { return {}; } virtual StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) { - return nullptr; + return {}; } virtual StructuredData::DictionarySP GetRegistersForThread(lldb::tid_t tid) { - return nullptr; + return {}; } virtual lldb::DataExtractorSP ReadMemoryAtAddress(lldb::addr_t address, size_t size, Status &error) { - return nullptr; + return {}; } - virtual StructuredData::ArraySP GetLoadedImages() { return nullptr; } + virtual StructuredData::ArraySP GetLoadedImages() { return {}; } virtual lldb::pid_t GetProcessID() { return LLDB_INVALID_PROCESS_ID; } @@ -66,12 +66,12 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return llvm::None; } - virtual StructuredData::DictionarySP GetMetadata() { return nullptr; } + virtual StructuredData::DictionarySP GetMetadata() { return {}; } protected: friend class ScriptedThread; virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { - return nullptr; + return {}; } }; @@ -81,7 +81,7 @@ class ScriptedThreadInterface : virtual public ScriptedInterface { CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj = nullptr) override { - return nullptr; + return {}; } virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; } @@ -92,17 +92,17 @@ class ScriptedThreadInterface : virtual public ScriptedInterface { virtual llvm::Optional GetQueue() { return llvm::None; } - virtual StructuredData::DictionarySP GetStopReason() { return nullptr; } + virtual StructuredData::DictionarySP GetStopReason() { return {}; } - virtual StructuredData::ArraySP GetStackFrames() { return nullptr; } + virtual StructuredData::ArraySP GetStackFrames() { return {}; } - virtual StructuredData::DictionarySP GetRegisterInfo() { return nullptr; } + virtual StructuredData::DictionarySP GetRegisterInfo() { return {}; } virtual llvm::Optional GetRegisterContext() { return llvm::None; } - virtual StructuredData::ArraySP GetExtendedInfo() { return nullptr; } + virtual StructuredData::ArraySP GetExtendedInfo() { return {}; } }; } // namespace lldb_private From 0309081e1f4b564693dd4e4d3c7b1d700780c62b Mon Sep 17 00:00:00 2001 From: George Hu Date: Wed, 2 Nov 2022 12:19:22 -0700 Subject: [PATCH 193/516] Override CalculateFrameVariableError in SymbolFileOnDemand Differential Revision: https://reviews.llvm.org/D137284 --- lldb/include/lldb/Symbol/SymbolFileOnDemand.h | 3 + lldb/source/Symbol/SymbolFileOnDemand.cpp | 9 +++ .../variables/TestVSCode_variables.py | 73 ++++++++++++------- 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h index 05708395687f2..a215c7e32b26a 100644 --- a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h +++ b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h @@ -117,6 +117,9 @@ class SymbolFileOnDemand : public lldb_private::SymbolFile { lldb::SymbolContextItem resolve_scope, lldb_private::SymbolContext &sc) override; + lldb_private::Status + CalculateFrameVariableError(lldb_private::StackFrame &frame) override; + uint32_t ResolveSymbolContext( const lldb_private::SourceLocationSpec &src_location_spec, lldb::SymbolContextItem resolve_scope, diff --git a/lldb/source/Symbol/SymbolFileOnDemand.cpp b/lldb/source/Symbol/SymbolFileOnDemand.cpp index b4c9ed002a8ea..737cb1042ca76 100644 --- a/lldb/source/Symbol/SymbolFileOnDemand.cpp +++ b/lldb/source/Symbol/SymbolFileOnDemand.cpp @@ -274,6 +274,15 @@ SymbolFileOnDemand::ResolveSymbolContext(const Address &so_addr, return m_sym_file_impl->ResolveSymbolContext(so_addr, resolve_scope, sc); } +Status SymbolFileOnDemand::CalculateFrameVariableError(StackFrame &frame) { + if (!m_debug_info_enabled) { + LLDB_LOG(GetLog(), "[{0}] {1} is skipped", GetSymbolFileName(), + __FUNCTION__); + return Status(); + } + return m_sym_file_impl->CalculateFrameVariableError(frame); +} + uint32_t SymbolFileOnDemand::ResolveSymbolContext( const SourceLocationSpec &src_location_spec, SymbolContextItem resolve_scope, SymbolContextList &sc_list) { diff --git a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py index 9b9195561606b..a6a7e159169b6 100644 --- a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py +++ b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py @@ -85,6 +85,40 @@ def verify_variables(self, verify_dict, variables, varref_dict=None): 'variable "%s" in verify dictionary' % (name)) self.verify_values(verify_dict[name], variable, varref_dict) + def darwin_dwarf_missing_obj(self, initCommands): + self.build(debug_info="dwarf") + program = self.getBuildArtifact("a.out") + main_obj = self.getBuildArtifact("main.o") + self.assertTrue(os.path.exists(main_obj)) + # Delete the main.o file that contains the debug info so we force an + # error when we run to main and try to get variables + os.unlink(main_obj) + + self.create_debug_adaptor() + self.assertTrue(os.path.exists(program), 'executable must exist') + + self.launch(program=program, + initCommands=initCommands) + + functions = ['main'] + breakpoint_ids = self.set_function_breakpoints(functions) + self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint") + self.continue_to_breakpoints(breakpoint_ids) + + locals = self.vscode.get_local_variables() + + verify_locals = { + '': { + 'equals': {'type': 'const char *'}, + 'contains': { 'value': [ + 'debug map object file ', + 'main.o" containing debug info does not exist, debug info will not be loaded'] + } + }, + } + varref_dict = {} + self.verify_variables(verify_locals, locals, varref_dict) + @skipIfWindows @skipIfRemote def test_scopes_variables_setVariable_evaluate(self): @@ -529,33 +563,16 @@ def test_darwin_dwarf_missing_obj(self): changing compiler options and are designed to give better feedback to the user. ''' - self.build(debug_info="dwarf") - program = self.getBuildArtifact("a.out") - main_obj = self.getBuildArtifact("main.o") - self.assertTrue(os.path.exists(main_obj)) - # Delete the main.o file that contains the debug info so we force an - # error when we run to main and try to get variables - os.unlink(main_obj) - - self.create_debug_adaptor() - self.assertTrue(os.path.exists(program), 'executable must exist') - self.launch(program) - - functions = ['main'] - breakpoint_ids = self.set_function_breakpoints(functions) - self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint") - self.continue_to_breakpoints(breakpoint_ids) + self.darwin_dwarf_missing_obj(None) - locals = self.vscode.get_local_variables() - verify_locals = { - '': { - 'equals': {'type': 'const char *'}, - 'contains': { 'value': [ - 'debug map object file ', - 'main.o" containing debug info does not exist, debug info will not be loaded'] - } - }, - } - varref_dict = {} - self.verify_variables(verify_locals, locals, varref_dict) + @no_debug_info_test + @skipUnlessDarwin + def test_darwin_dwarf_missing_obj_with_symbol_ondemand_enabled(self): + ''' + Test that if we build a binary with DWARF in .o files and we remove + the .o file for main.cpp, that we get a variable named "" + whose value matches the appriopriate error. Test with symbol_ondemand_enabled. + ''' + initCommands = ['settings set symbols.load-on-demand true'] + self.darwin_dwarf_missing_obj(initCommands) From c9a959334707810795f7e8c56ab6dd55ff0a359d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 3 Nov 2022 15:12:50 -0700 Subject: [PATCH 194/516] clang/cmake: Simplify lit detection for standalone builds Reviewed By: mgorny, phosek, Ericson2314 Differential Revision: https://reviews.llvm.org/D137224 --- clang/CMakeLists.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 236e6fbaca280..8763cc0c1caa8 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -93,9 +93,14 @@ if(CLANG_BUILT_STANDALONE) set(LLVM_UTILS_PROVIDED ON) endif() + # Seek installed Lit. + find_program(LLVM_LIT + NAMES llvm-lit lit.py lit + PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" + DOC "Path to lit.py") + if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) # Note: path not really used, except for checking if lit was found - set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit) add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit utils/llvm-lit) endif() @@ -112,12 +117,6 @@ if(CLANG_BUILT_STANDALONE) AND EXISTS ${UNITTEST_DIR}/CMakeLists.txt) add_subdirectory(${UNITTEST_DIR} utils/unittest) endif() - else() - # Seek installed Lit. - find_program(LLVM_LIT - NAMES llvm-lit lit.py lit - PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" - DOC "Path to lit.py") endif() if(LLVM_LIT) From 348189880b7bc9513ccdd0ea3a1e255b4d3190e7 Mon Sep 17 00:00:00 2001 From: Peter Rong Date: Mon, 24 Oct 2022 19:10:47 -0700 Subject: [PATCH 195/516] [IR Verifier] didn't check if switch case is constant, align IR Verifier's check with LLParser. If a programmer incorrectly `Switch->setOperand()` and `Verifier` will pass, causing problems when dumping this `Module` This patch aligns SwitchInst's check with LLParser. It also includes a test to justify the patch. Differential Revision: https://reviews.llvm.org/D136656 --- llvm/lib/IR/Verifier.cpp | 2 ++ llvm/unittests/IR/VerifierTest.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 0614f206981a1..e7c2eb9e8818b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2879,6 +2879,8 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet Constants; for (auto &Case : SI.cases()) { + Check(isa(SI.getOperand(Case.getCaseIndex() * 2 + 2)), + "Case value is not a constant integer.", &SI); Check(Case.getCaseValue()->getType() == SwitchTy, "Switch constants must all be same type as switch value!", &SI); Check(Constants.insert(Case.getCaseValue()).second, diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 5f34463a345eb..c4977a9ecd55a 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -270,5 +270,35 @@ TEST(VerifierTest, AttributesWrongContext) { EXPECT_TRUE(verifyFunction(*F2)); } +TEST(VerifierTest, SwitchInst) { + LLVMContext C; + Module M("M", C); + IntegerType *Int32Ty = Type::getInt32Ty(C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), {Int32Ty, Int32Ty}, + /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M); + BasicBlock *Entry = BasicBlock::Create(C, "entry", F); + BasicBlock *Default = BasicBlock::Create(C, "default", F); + BasicBlock *OnOne = BasicBlock::Create(C, "on_one", F); + BasicBlock *OnTwo = BasicBlock::Create(C, "on_two", F); + + BasicBlock *Exit = BasicBlock::Create(C, "exit", F); + + BranchInst::Create(Exit, Default); + BranchInst::Create(Exit, OnTwo); + BranchInst::Create(Exit, OnOne); + ReturnInst::Create(C, Exit); + + Value *Cond = F->getArg(0); + SwitchInst *Switch = SwitchInst::Create(Cond, Default, 2, Entry); + Switch->addCase(ConstantInt::get(Int32Ty, 1), OnOne); + Switch->addCase(ConstantInt::get(Int32Ty, 2), OnTwo); + + EXPECT_FALSE(verifyFunction(*F)); + // set one case value to function argument. + Switch->setOperand(2, F->getArg(1)); + EXPECT_TRUE(verifyFunction(*F)); +} + } // end anonymous namespace } // end namespace llvm From ef0d689e8be2ac22b2e2da477217c761354c0ff9 Mon Sep 17 00:00:00 2001 From: Henry Yu Date: Thu, 3 Nov 2022 15:30:38 -0700 Subject: [PATCH 196/516] [SelectionDAGBuilder] use bitcast instead of AnyExtOrTrunc if copy parts from an int vector to a float vector to fix issue #58615 The getCopyFromPartsVector doesn't work correctly when PartEVT and ValueVT have both different element type and different size. This patch 1) removes the part of a comment that contains the incorrect assumption that element type are the same 2) use bitcast when copy parts of int vector to a float vector after the subvector extraction Reviewed By: Peter, efriedma Differential Revision: https://reviews.llvm.org/D136726 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 9 +++++---- llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 331149bdd05f5..a63e85a2863e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -398,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // If the element type of the source/dest vectors are the same, but the - // parts vector has more elements than the value vector, then we have a - // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the - // elements we want. + // If the parts vector has more elements than the value vector, then we + // have a vector widening case (e.g. <2 x float> -> <4 x float>). + // Extract the elements we want. if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && @@ -415,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, DAG.getVectorIdxConstant(0, DL)); if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } // Promoted vector extract diff --git a/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll b/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll new file mode 100644 index 0000000000000..1677a7b5d013d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +define <1 x float> @f(<16 x i64> %0, <1 x float> %1) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ldr d0, [sp] +; CHECK-NEXT: ret +BB: + ret <1 x float> %1 +} From 0589038a6ff12be452be2a2011ac23d1c0d8e7dc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 3 Nov 2022 15:42:51 -0700 Subject: [PATCH 197/516] [StatepointLowering] remove unused parameter. NFC Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D136885 --- llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 11dd2e3dcd0f5..0b760ac652d2e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -321,7 +321,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// reference lowered call result static std::pair lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, - SelectionDAGBuilder &Builder, SmallVectorImpl &PendingExports) { + SelectionDAGBuilder &Builder) { SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); @@ -770,8 +770,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Get call node, we will replace it later with statepoint SDValue ReturnVal; SDNode *CallNode; - std::tie(ReturnVal, CallNode) = - lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); + std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. From 3310fe55d9480ef3c27037043a5c3db8c7003914 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Wed, 2 Nov 2022 05:18:26 +0000 Subject: [PATCH 198/516] [mlir][linalg] Add reduction tiling transformation Add a transformation to tile reduction ops into a parallel operation followed by a merge operation. This is equivalent to the existing reduction spliting transformation but using loops instead of using higher dimensions linalg. Differential Revision: https://reviews.llvm.org/D136586 --- .../Linalg/TransformOps/LinalgTransformOps.td | 88 ++++++++++ .../include/mlir/Dialect/Linalg/Utils/Utils.h | 4 + .../SCF/Transforms/TileUsingInterface.h | 40 +++++ .../mlir/Interfaces/TilingInterface.td | 68 ++++++++ .../TransformOps/LinalgTransformOps.cpp | 27 +++ .../Linalg/Transforms/SplitReduction.cpp | 49 ++---- .../Linalg/Transforms/TilingInterfaceImpl.cpp | 160 ++++++++++++++++++ mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 41 ++++- .../SCF/Transforms/TileUsingInterface.cpp | 84 +++++++++ .../Linalg/transform-tile-reduction.mlir | 88 ++++++++++ 10 files changed, 606 insertions(+), 43 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/transform-tile-reduction.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 5c304f5efb6ea..6cb14acb1b089 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -608,6 +608,94 @@ def SplitReductionOp : Op { + let description = [{ + Indicates that the given `target` op should be transformed with the + `tileReduction` transformation with the tile size provided as attribute. + + This transformation tiles the `target` along the reduction dimensions. It + creates a tensor initialized with the identity value. Then it creates nested + loops with a parallel version of `target` op inside. The parallel op + dimensions are less or equal to the tile size passed by user. + After the loop a merge operation is created to do a final reduction with the + partial reductions. + The initial tensor always uses the tile size dimension. This may overallocate + if the tile size is greater than the reduction dimension. + + #### Return modes + + This 3 returned handles point to: + - the fill op used to initialize the neutral element, + - the parallel tiled op and + - the result-combining op. + + #### Example: + + ``` + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %1 = arith.addf %arg7, %arg9 : f32 + linalg.yield %1 : f32 + } -> tensor + return %red : tensor + ``` + + is transformed into: + + ``` + %0 = tensor.empty(%dim_1) : tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor) { + %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor to tensor + %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor to tensor + %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%extracted_slice_2 : tensor) + outs(%extracted_slice : tensor) { + ^bb0(%in: f32, %out: f32): + %5 = arith.addf %in, %out : f32 + linalg.yield %5 : f32 + } -> tensor + %dim_3 = tensor.dim %1, %c0 : tensor + %inserted_slice = tensor.insert_slice %4 into %arg3[0, 0] [%dim_3, 5] [1, 1] : tensor into tensor + scf.yield %inserted_slice : tensor + } + %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%2 : tensor) + outs(%arg1 : tensor) { + ^bb0(%in: f32, %out: f32): + %4 = arith.addf %in, %out : f32 + linalg.yield %4 : f32 + } -> tensor + ``` + }]; + + let arguments = (ins PDL_Operation:$target, + DefaultValuedAttr:$tile_sizes); + let results = (outs PDL_Operation:$fill_op, + PDL_Operation:$split_linalg_op, + PDL_Operation:$combining_linalg_op); + + let assemblyFormat = "$target attr-dict"; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::linalg::LinalgOp target, + ::llvm::SmallVectorImpl<::mlir::Operation *> &results, + ::mlir::transform::TransformState &state); + }]; +} + def TileOp : Op, DeclareOpInterfaceMethods]> { diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 6a10d4332e7eb..5fc7938e0dd2f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -137,6 +137,10 @@ GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to); Optional> getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes); +/// Return the identity numeric value associated to the give op. Return +/// llvm::None if there is no known neutral element. +Optional getNeutralElement(Operation *op); + //===----------------------------------------------------------------------===// // Fusion / Tiling utilities //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index 6cdef2512f607..9fa4114c77b11 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -136,6 +136,46 @@ tileConsumerAndFuseProducerGreedilyUsingSCFForOp( FailureOr> lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op); +/// Transformation information returned after reduction tiling. +struct SCFReductionTilingResult { + /// The partial reduction tiled op generated. + Operation *parallelTiledOp; + /// The final reduction operation merging all the partial reductions. + Operation *mergeOp; + /// Initial op + Operation *initialOp; + /// The `scf.for` operations that iterate over the tiles. + SmallVector loops; +}; + +/// Method to tile a reduction and generate a parallel op within a serial loop. +/// Each of the partial reductions are calculated in parallel. Then after the +/// loop all the partial reduction are merged into a final reduction. +/// For example for the following sequence +/// +/// ```mlir +/// %0 = linalg.generic %in ["parallel", "reduction"] +/// : tensor<7x9xf32> -> tensor<7xf32> +/// ``` +/// +/// into: +/// +/// ```mlir +/// %0 = linalg.fill ... : tensor<7x4xf32> +/// %1 = scf.for ... iter_args(%arg0 = %0) +/// %2 = tensor.extract_slice %arg0 : tensor<7x4xf32> -> tensor<7x?xf32> +/// %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32> +/// %4 = linalg.generic %2, %3 ["parallel", "parallel"] +/// : tensor<7x?xf32> -> tensor<7x?xf32> +/// %5 = tensor.insert_slice %3, %0[0, 0] : tensor<7x4xf32> +/// } +/// %6 = linalg.generic %1 ["parallel", "reduction"] +/// : tensor<7x4xf32> -> tensor<7xf32> +/// ``` +FailureOr +tileReductionUsingScf(PatternRewriter &b, PartialReductionOpInterface op, + ArrayRef tileSize); + } // namespace scf } // namespace mlir diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index 0cdf7a8eb649a..dc6ffcbb7accc 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -155,4 +155,72 @@ def TilingInterface : OpInterface<"TilingInterface"> { > ]; } + +def PartialReductionOpInterface : OpInterface<"PartialReductionOpInterface"> { + let description = [{ + Interface for allowing operations to expose information needed to + tile reductions using partial reduction followed by merge. This is + complementary to TilingInterface to tile reductions. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Method to generate a tensor initalized with the identity value of the + operation reduction. The tensor shape is equal to operation result + shape with new dimension for each non zero tile size. + }], + /*retType=*/"FailureOr", + /*methodName=*/"generateInitialTensorForPartialReduction", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ArrayRef":$sizes, + "ArrayRef":$reductionDim), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return failure(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Method to generate a tiled version of the operation where the tiled + reduction dimension are converted to parallel dimensions with a size + less or equal to the tile size. This is meant to be used with + `mergeReductions` method which will combine the partial reductions. + }], + /*retType=*/"Operation*", + /*methodName=*/"tileToPartialReduction", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ValueRange":$init, + "ArrayRef":$offsets, + "ArrayRef":$sizes, + "ArrayRef":$reductionDims), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return nullptr; + }] + >, + InterfaceMethod< + /*desc=*/[{ + Method to merge partial reductions for an operation that has been + tiled along the reduction dimensions. This will only apply the + reduction the operation. + }], + /*retType=*/"Operation*", + /*methodName=*/"mergeReductions", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ValueRange":$partialReduce, + "ArrayRef":$reductionDim), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return nullptr; + }] + > + ]; +} #endif // MLIR_TILINGINTERFACE diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 513882ec91260..c8a3cb6946e3d 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1094,6 +1094,33 @@ transform::SplitReductionOp::applyToOne(linalg::LinalgOp target, return DiagnosedSilenceableFailure(success()); } +//===----------------------------------------------------------------------===// +// SplitReductionOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure transform::TileReductionUsingScfOp::applyToOne( + linalg::LinalgOp target, SmallVectorImpl &results, + transform::TransformState &state) { + SimpleRewriter rewriter(getContext()); + rewriter.setInsertionPoint(target); + SmallVector tileSizes = extractFromI64ArrayAttr(getTileSizes()); + SmallVector sizes; + for (int64_t size : tileSizes) { + sizes.push_back(rewriter.getIndexAttr(size)); + } + + FailureOr result = scf::tileReductionUsingScf( + rewriter, cast(target.getOperation()), + sizes); + + if (failed(result)) + return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); + results.push_back(result->initialOp); + results.push_back(result->parallelTiledOp); + results.push_back(result->mergeOp); + return DiagnosedSilenceableFailure(success()); +} + //===----------------------------------------------------------------------===// // TileOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp index 32d05c5acbe6c..0608c361e774b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp @@ -26,38 +26,6 @@ using namespace mlir; using namespace mlir::linalg; -/// Return the identity numeric value associated to the give op. -static Attribute getNeutralElement(Operation *op) { - // Builder only used as helper for attribute creation. - OpBuilder b(op->getContext()); - Type resultType = op->getResult(0).getType(); - if (auto floatType = resultType.dyn_cast()) { - const llvm::fltSemantics &semantic = floatType.getFloatSemantics(); - if (isa(op)) - return b.getFloatAttr(resultType, llvm::APFloat::getZero(semantic)); - if (isa(op)) - return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1)); - if (isa(op)) - return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); - if (isa(op)) - return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); - return Attribute(); - } - if (isa(op)) - return b.getIntegerAttr(resultType, 0); - if (isa(op)) - return b.getIntegerAttr(resultType, -1); - if (isa(op)) - return b.getIntegerAttr(resultType, std::numeric_limits::min()); - if (isa(op)) - return b.getIntegerAttr(resultType, std::numeric_limits::max()); - if (isa(op)) - return b.getIntegerAttr(resultType, 1); - return Attribute(); -} - FailureOr mlir::linalg::splitReduction( PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc) { @@ -88,8 +56,8 @@ FailureOr mlir::linalg::splitReduction( return b.notifyMatchFailure(op, "Cannot match the reduction pattern"); Operation *reductionOp = combinerOps[0]; - Attribute identity = getNeutralElement(reductionOp); - if (!identity) + Optional identity = getNeutralElement(reductionOp); + if (!identity.has_value()) return b.notifyMatchFailure(op, "Unknown identity value for the reduction"); Location loc = op->getLoc(); @@ -187,7 +155,7 @@ FailureOr mlir::linalg::splitReduction( emptyOrAllocTensor = b.create( loc, newOutputShape, op.getRegionOutputArgs()[0].getType()); } - Value constantOp = b.create(loc, identity); + Value constantOp = b.create(loc, *identity); Value identityTensor = b.create(op->getLoc(), constantOp, emptyOrAllocTensor) .getResult(0); @@ -309,10 +277,13 @@ FailureOr mlir::linalg::splitReductionByScaling( if (!matchReduction(op.getRegionOutputArgs(), 0, combinerOps)) return b.notifyMatchFailure(op, "cannot match a reduction pattern"); - SmallVector neutralElements = llvm::to_vector<4>( - llvm::map_range(combinerOps, [&](Operation *reductionOp) { - return getNeutralElement(reductionOp); - })); + SmallVector neutralElements; + for (Operation *reductionOp : combinerOps) { + Optional neutralElement = getNeutralElement(reductionOp); + if (!neutralElement.has_value()) + return b.notifyMatchFailure(op, "cannot find neutral element."); + neutralElements.push_back(*neutralElement); + } if (!llvm::all_of(neutralElements, [](Attribute attr) { return attr; })) return b.notifyMatchFailure(op, "unknown reduction neutral"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index c843f0f400793..d1fcc01ca853d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" +#include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" @@ -240,11 +241,170 @@ struct LinalgOpTilingInterface } }; +//===----------------------------------------------------------------------===// +// External Model for implementing `PartialReductionInterface` for `LinalgOp`s. +//===----------------------------------------------------------------------===// + +/// External model implementation of PartialReductionInterface for LinalgOps. +template +struct LinalgOpPartialReductionInterface + : public PartialReductionOpInterface::ExternalModel< + LinalgOpPartialReductionInterface, LinalgOpTy> { + FailureOr generateInitialTensorForPartialReduction( + Operation *op, OpBuilder &b, Location loc, ArrayRef sizes, + ArrayRef reductionDims) const { + auto linalgOp = cast(op); + OpBuilder::InsertionGuard guard(b); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + if (linalgOp.hasBufferSemantics()) + return op->emitOpError("expected operation to have tensor semantics"); + // Insert the new parallel dimension based on the index of the reduction + // loop. This could be controlled by user for more flexibility. + int64_t insertSplitDimension = reductionDims[0]; + + SmallVector combinerOps; + if (!matchReduction(linalgOp.getRegionOutputArgs(), 0, combinerOps) || + combinerOps.size() != 1) + return op->emitOpError("Failed to anaysis the reduction operation."); + + Operation *reductionOp = combinerOps[0]; + Optional identity = getNeutralElement(reductionOp); + if (!identity.has_value()) + return op->emitOpError( + "Failed to get an identity value for the reduction operation."); + + // Calculate the new shape, we insert the new dimension based on the index + // of the reduction dimension. + SmallVector newOutputShape; + ArrayRef oldShape = + linalgOp.getShape(linalgOp.getDpsInitOperand(0)); + SmallVector dynamicDims; + for (int64_t idx : llvm::seq(0, oldShape.size() + 1)) { + if (idx == insertSplitDimension) { + dispatchIndexOpFoldResults(sizes[idx], dynamicDims, newOutputShape, + ShapedType::kDynamicStrideOrOffset); + continue; + } + int64_t oldIdx = idx < insertSplitDimension ? idx : idx - 1; + int64_t dim = oldShape[oldIdx]; + newOutputShape.push_back(dim); + if (ShapedType::isDynamic(dim)) + dynamicDims.push_back(b.createOrFold( + loc, linalgOp.getDpsInitOperand(0)->get(), oldIdx)); + } + Value emptyTensor = b.create( + loc, newOutputShape, linalgOp.getRegionOutputArgs()[0].getType(), + dynamicDims); + Value constantOp = b.create(loc, *identity); + auto identityTensor = + b.create(loc, constantOp, emptyTensor); + return identityTensor.getOperation(); + } + + Operation *tileToPartialReduction(Operation *op, OpBuilder &b, Location loc, + ValueRange init, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef reductionDims) const { + OpBuilder::InsertionGuard guard(b); + auto linalgOp = cast(op); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + int64_t insertSplitDimension = reductionDims[0]; + + AffineMap oldOutputMap = + linalgOp.getMatchingIndexingMap(linalgOp.getDpsInitOperand(0)); + SmallVector outputExpr; + for (auto &[idx, expr] : llvm::enumerate(oldOutputMap.getResults())) { + if (static_cast(idx) == insertSplitDimension) { + outputExpr.push_back(b.getAffineDimExpr(reductionDims[0])); + } + outputExpr.push_back(expr); + } + if (insertSplitDimension == oldOutputMap.getNumResults()) + outputExpr.push_back(b.getAffineDimExpr(reductionDims[0])); + + // Step 1: Extract a slice of the input operands. + SmallVector valuesToTile = linalgOp.getDpsInputOperands(); + SmallVector tiledOperands = + makeTiledShapes(b, loc, op, valuesToTile, offsets, sizes, {}, true); + + // Step 2: Extract the accumulator operands + SmallVector strides(offsets.size(), b.getIndexAttr(1)); + SmallVector outOffsets(offsets.size(), b.getIndexAttr(0)); + // TODO: use SubsetExtractOpInterface once it is available. + Value out = b.create(loc, init[0], outOffsets, + sizes, strides); + + // Step3. create a generic op where the reduction dimension is replaced by a + // parallel dimension of the size of reduction. + SmallVector newIteratorTypes = linalgOp.getIteratorTypesArray(); + newIteratorTypes[reductionDims[0]] = getParallelIteratorTypeName(); + SmallVector newMaps = linalgOp.getIndexingMapsArray(); + newMaps.back() = AffineMap::get(newMaps.back().getNumDims(), 0, outputExpr, + linalgOp.getContext()); + auto genericOp = + b.create(loc, TypeRange({out.getType()}), tiledOperands, + ValueRange({out}), newMaps, newIteratorTypes); + BlockAndValueMapping mapping; + op->getRegion(0).cloneInto(&genericOp.getRegion(), + genericOp.getRegion().begin(), mapping); + return genericOp.getOperation(); + } + + Operation *mergeReductions(Operation *op, OpBuilder &b, Location loc, + ValueRange partialReduce, + ArrayRef reductionDims) const { + auto linalgOp = cast(op); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + int64_t dimToMerge = reductionDims[0]; + + // Then create a new reduction that only reduce the newly added dimension + // from the previous op. + int64_t intermRank = + partialReduce[0].getType().cast().getRank(); + AffineMap inputMap = b.getMultiDimIdentityMap(intermRank); + SmallVector reductionIteratorTypes; + SmallVector exprs; + for (int64_t i : llvm::seq(0, intermRank)) { + if (dimToMerge == i) { + reductionIteratorTypes.push_back(getReductionIteratorTypeName()); + } else { + exprs.push_back(b.getAffineDimExpr(i)); + reductionIteratorTypes.push_back(getParallelIteratorTypeName()); + } + } + AffineMap outputMap = + AffineMap::get(intermRank, 0, exprs, op->getContext()); + SmallVector reductionMaps = {inputMap, outputMap}; + + SmallVector combinerOps; + matchReduction(linalgOp.getRegionOutputArgs(), 0, combinerOps); + Operation *reductionOp = combinerOps[0]; + + auto reduction = b.create( + loc, op->getResultTypes(), ValueRange({partialReduce[0]}), + SmallVector{linalgOp.getDpsInitOperands()}, reductionMaps, + reductionIteratorTypes, + [reductionOp](OpBuilder &b, Location loc, ValueRange inputs) { + Operation *clonedReductionOp = b.clone(*reductionOp); + clonedReductionOp->setOperand(0, inputs[0]); + clonedReductionOp->setOperand(1, inputs[1]); + b.create(loc, clonedReductionOp->getResult(0)); + }); + return reduction.getOperation(); + } +}; + } // namespace template static void registerOne(MLIRContext *ctx) { OpType::template attachInterface>(*ctx); + OpType::template attachInterface>( + *ctx); } /// Variadic helper function. diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index ce15c6767b24b..04cbed0c4e135 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -948,13 +948,14 @@ computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, SmallVector subShapeSizes = computeTileSizes(builder, loc, tileSizes, sizeBounds); - assert(static_cast(valuesToTile.size()) == + assert(static_cast(valuesToTile.size()) <= linalgOp->getNumOperands() && - "expected one value to tile for every operand"); + "more value to tile than operands."); SmallVector> allSliceParams; allSliceParams.reserve(valuesToTile.size()); - for (OpOperand &opOperand : linalgOp->getOpOperands()) { - Value shapedOp = valuesToTile[opOperand.getOperandNumber()]; + for (auto [opOperand, val] : + llvm::zip(linalgOp->getOpOperands(), valuesToTile)) { + Value shapedOp = val; LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp); AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand); // Use `opOperand` as is if it is not tiled and not an output tensor. Having @@ -1059,5 +1060,37 @@ getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes) { return reassociation; } +/// Return the identity numeric value associated to the give op. +Optional getNeutralElement(Operation *op) { + // Builder only used as helper for attribute creation. + OpBuilder b(op->getContext()); + Type resultType = op->getResult(0).getType(); + if (auto floatType = resultType.dyn_cast()) { + const llvm::fltSemantics &semantic = floatType.getFloatSemantics(); + if (isa(op)) + return b.getFloatAttr(resultType, llvm::APFloat::getZero(semantic)); + if (isa(op)) + return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1)); + if (isa(op)) + return b.getFloatAttr(resultType, + llvm::APFloat::getLargest(semantic, true)); + if (isa(op)) + return b.getFloatAttr(resultType, + llvm::APFloat::getLargest(semantic, true)); + return Attribute(); + } + if (isa(op)) + return b.getIntegerAttr(resultType, 0); + if (isa(op)) + return b.getIntegerAttr(resultType, -1); + if (isa(op)) + return b.getIntegerAttr(resultType, std::numeric_limits::min()); + if (isa(op)) + return b.getIntegerAttr(resultType, std::numeric_limits::max()); + if (isa(op)) + return b.getIntegerAttr(resultType, 1); + return llvm::None; +} + } // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index 2d6edb7332ac8..0c86bd4d1262a 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -424,6 +424,90 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, return tilingResult; } +FailureOr +mlir::scf::tileReductionUsingScf(PatternRewriter &b, + PartialReductionOpInterface op, + ArrayRef tileSize) { + Location loc = op.getLoc(); + // Ops implementing PartialReductionOpInterface are expected to implement + // TilingInterface. + auto tilingInterfaceOp = cast(op.getOperation()); + SmallVector iterationDomain = tilingInterfaceOp.getIterationDomain(b); + SmallVector tileSizeVector = + getValueOrCreateConstantIndexOp(b, loc, tileSize); + if (tileSizeVector.size() < iterationDomain.size()) { + auto zero = b.create(loc, 0); + tileSizeVector.append(iterationDomain.size() - tileSizeVector.size(), zero); + } + if (op->getNumResults() != 1) + return b.notifyMatchFailure( + op, "don't support ops with multiple results for now"); + SmallVector iterators = + tilingInterfaceOp.getLoopIteratorTypes(); + int64_t numReductionDims = llvm::count( + tilingInterfaceOp.getLoopIteratorTypes(), utils::IteratorType::reduction); + if (numReductionDims != 1) + return b.notifyMatchFailure( + op, "only support ops with one reduction dimension."); + int reductionDim; + for (auto &[idx, iteratorType] : + llvm::enumerate(tilingInterfaceOp.getLoopIteratorTypes())) { + if (iteratorType == utils::IteratorType::reduction) { + reductionDim = idx; + break; + } + } + // 1. create the inital tensor value. + FailureOr identityTensor = + op.generateInitialTensorForPartialReduction(b, loc, tileSize, + reductionDim); + if (failed(identityTensor)) + return b.notifyMatchFailure(op, + "cannot create a tensor of identity value."); + // 2. Create the nested loops. + SmallVector offsets, sizes; + SmallVector loops = generateTileLoopNest( + b, loc, iterationDomain, tileSizeVector, offsets, sizes); + + // 3. Generate the tiled implementation within the inner most loop. + b.setInsertionPoint(loops.back().getBody()->getTerminator()); + Operation *parallelOp = + op.tileToPartialReduction(b, loc, identityTensor.value()->getResults(), + offsets, sizes, reductionDim); + + SmallVector resultSizesList; + for (size_t i = 0; i < offsets.size(); i++) + resultSizesList.push_back( + b.createOrFold(loc, parallelOp->getResult(0), i)); + SmallVector outOffsets(offsets.size(), b.getIndexAttr(0)); + FailureOr> replacementOr = yieldTiledValues( + b, identityTensor.value()->getResults(), parallelOp->getResults(), + outOffsets, resultSizesList, loops); + if (failed(replacementOr)) + return b.notifyMatchFailure(op, "failed to yield replacement"); + + auto dstOp = cast(parallelOp); + auto innerMostLoop = loops.back(); + SmallVector destinationTensors = dstOp.getDpsInitOperands(); + assert(destinationTensors.size() == + innerMostLoop.getRegionIterArgs().size() && + "unexpected number of outputs"); + updateDestinationOperandsForTiledOp(b, destinationTensors, + innerMostLoop.getRegionIterArgs()); + + // 4. Apply the merge reduction to combine all the partial values. + b.setInsertionPointAfter(*loops.begin()); + Operation *mergeOp = + op.mergeReductions(b, loc, replacementOr.value(), reductionDim); + b.replaceOp(op, mergeOp->getResults()); + + SCFReductionTilingResult results; + results.initialOp = identityTensor.value(); + results.loops = std::move(loops); + results.parallelTiledOp = parallelOp; + results.mergeOp = mergeOp; + return results; +} //===----------------------------------------------------------------------===// // tileConsumerAndFuseProducerGreedilyUsingSCFForOp implementation. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir new file mode 100644 index 0000000000000..dad2f8476d1ff --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -0,0 +1,88 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -canonicalize | FileCheck %s + +func.func @reduction_tile(%arg0: tensor, %out: tensor) -> tensor { + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %1 = arith.mulf %arg7, %arg7 : f32 + %2 = arith.addf %1, %arg9 : f32 + linalg.yield %2 : f32 + } -> tensor + return %red : tensor +} + +transform.sequence failures(propagate) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 5] } +} + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 5)> +// CHECK: func @reduction_tile(%[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor +// CHECK-DAG: %[[I:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor +// CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor +// CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor +// CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor) { +// CHECK: %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]] +// CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor +// CHECK: %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor +// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) outs(%[[EXT]] : tensor) { +// CHECK: arith.mulf +// CHECK: arith.addf +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: %[[D3:.*]] = tensor.dim %[[PR]], %[[C0]] : tensor +// CHECK: %[[D4:.*]] = tensor.dim %[[PR]], %[[C1]] : tensor +// CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor +// CHECK: scf.yield %[[INS]] : tensor +// CHECK: } +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: arith.addf +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: return %[[R]] : tensor + +// ----- + +func.func @reduction_tile_transpose(%arg0: tensor, %out: tensor) -> tensor { + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d1)>], + iterator_types = ["reduction", "parallel"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %42 = arith.addf %arg7, %arg9 : f32 + linalg.yield %42 : f32 + } -> tensor + return %red : tensor +} + +transform.sequence failures(propagate) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [5, 0] } +} + +// CHECK: func @reduction_tile_transpose +// CHECK: tensor.empty(%{{.*}}) : tensor<5x?xf32> +// CHECK: linalg.fill {{.*}} : tensor<5x?xf32>) -> tensor<5x?xf32> +// CHECK: scf.for +// CHECK: linalg.generic +// CHECK: %[[D3:.*]] = tensor.dim %{{.*}}, %[[C0]] : tensor +// CHECK: %[[D4:.*]] = tensor.dim %{{.*}}, %[[C1]] : tensor +// CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor<5x?xf32> +// CHECK: scf.yield {{.*}} : tensor<5x?xf32> +// CHECK: } +// CHECK: linalg.generic +// CHECK: return From 3d83a57721def7aad227d68b1e5e0afa6a74a33f Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 31 Oct 2022 11:19:36 -0700 Subject: [PATCH 199/516] [lldb] Support simplified template names when looking up functions This makes setting breakpoints work with -gsimple-template-names. Assume that callers handle false positives. For example, `Module::LookupInfo::Prune` removes wrong template instantiations when setting a breakpoint. Reviewed By: labath Differential Revision: https://reviews.llvm.org/D137098 --- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 18 ++++++++++++++ .../cpp/TestCPPBreakpointLocations.py | 24 ++++++++++++++++++- .../functionalities/breakpoint/cpp/main.cpp | 5 ++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 5d5a47bc0c92c..066fc9f434cae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2389,6 +2389,24 @@ void SymbolFileDWARF::FindFunctions(const Module::LookupInfo &lookup_info, ResolveFunction(die, include_inlines, sc_list); return true; }); + // With -gsimple-template-names, a templated type's DW_AT_name will not + // contain the template parameters. Try again stripping '<' and anything + // after, filtering out entries with template parameters that don't match. + { + const llvm::StringRef name_ref = name.GetStringRef(); + auto it = name_ref.find('<'); + if (it != llvm::StringRef::npos) { + const llvm::StringRef name_no_template_params = name_ref.slice(0, it); + + Module::LookupInfo no_tp_lookup_info(lookup_info); + no_tp_lookup_info.SetLookupName(ConstString(name_no_template_params)); + m_index->GetFunctions(no_tp_lookup_info, *this, parent_decl_ctx, [&](DWARFDIE die) { + if (resolved_dies.insert(die.GetDIE()).second) + ResolveFunction(die, include_inlines, sc_list); + return true; + }); + } + } // Return the number of variable that were appended to the list const uint32_t num_matches = sc_list.GetSize() - original_size; diff --git a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py index 6c86f5016a606..1dedc5d7f9bbd 100644 --- a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py @@ -12,7 +12,16 @@ class TestCPPBreakpointLocations(TestBase): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24764") def test(self): - self.build() + self.do_test(dict()) + + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24764") + @skipIf(compiler=no_match("clang")) + @skipIf(compiler_version=["<", "15.0"]) + def test_simple_template_names(self): + self.do_test(dict(CFLAGS_EXTRAS="-gsimple-template-names")) + + def do_test(self, debug_flags): + self.build(dictionary=debug_flags) self.breakpoint_id_tests() def verify_breakpoint_locations(self, target, bp_dict): @@ -57,7 +66,11 @@ def breakpoint_id_tests(self): # Template cases {'name': 'func', 'loc_names': []}, + {'name': 'Foo::func', 'loc_names': []}, + {'name': 'ns::Foo::func', 'loc_names': []}, {'name': 'func', 'loc_names': ['auto ns::Foo::func()']}, + {'name': 'Foo::func', 'loc_names': ['auto ns::Foo::func()']}, + {'name': 'ns::Foo::func', 'loc_names': ['auto ns::Foo::func()']}, {'name': 'func', 'loc_names': ['auto ns::Foo::func()', 'auto ns::Foo::func>()']}, @@ -71,6 +84,15 @@ def breakpoint_id_tests(self): {'name': 'operator<<', 'loc_names': ['void ns::Foo::operator<<(int)']}, {'name': 'ns::Foo::operator<<', 'loc_names': ['void ns::Foo::operator<<(int)', 'void ns::Foo::operator<<>(ns::Foo)']}, + + {'name': 'g', 'loc_names': []}, + {'name': 'g', 'loc_names': ['void ns::g()']}, + {'name': 'g', 'loc_names': ['void ns::g()']}, + {'name': 'g', 'loc_names': ['void ns::g()', 'void ns::g()']}, + {'name': 'ns::g', 'loc_names': []}, + {'name': 'ns::g', 'loc_names': ['void ns::g()']}, + {'name': 'ns::g', 'loc_names': ['void ns::g()']}, + {'name': 'ns::g', 'loc_names': ['void ns::g()', 'void ns::g()']}, ] for bp_dict in bp_dicts: diff --git a/lldb/test/API/functionalities/breakpoint/cpp/main.cpp b/lldb/test/API/functionalities/breakpoint/cpp/main.cpp index 7ee61e92ffd57..b2cee995198ad 100644 --- a/lldb/test/API/functionalities/breakpoint/cpp/main.cpp +++ b/lldb/test/API/functionalities/breakpoint/cpp/main.cpp @@ -94,6 +94,8 @@ template struct Foo { template void operator<<(T t) {} }; + +template void g() {} } // namespace ns int main (int argc, char const *argv[]) @@ -123,5 +125,8 @@ int main (int argc, char const *argv[]) f.operator<<(5); f.operator<< >({}); + ns::g(); + ns::g(); + return 0; } From 374e6462a4c970965d3348bc90a2109056b051c9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2022 15:54:09 -0700 Subject: [PATCH 200/516] [RISCV] Add missing break to the last case in a switch. NFC Might create a problem in the future if more cases are added. --- llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index 5e0613f1e2ab4..a31f898e104d7 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -286,6 +286,7 @@ static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, FixableDef.insert(&MI); return true; } + break; } return false; From 9ef31465114dcd226c27f43b73f221e89a4fa83d Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Thu, 3 Nov 2022 11:51:43 -0700 Subject: [PATCH 201/516] [mlir][index] Add shl, shrs, and shru ops This patch adds the left shift, signed right shift, and unsigned right shift operations to the index dialects with folders and LLVM lowerings. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D137349 --- .../include/mlir/Dialect/Index/IR/IndexOps.td | 63 +++++++++++ .../Conversion/IndexToLLVM/IndexToLLVM.cpp | 8 ++ mlir/lib/Dialect/Index/IR/IndexOps.cpp | 54 ++++++++- .../Conversion/IndexToLLVM/index-to-llvm.mlir | 8 +- .../Dialect/Index/index-canonicalize.mlir | 105 ++++++++++++++++++ mlir/test/Dialect/Index/index-ops.mlir | 6 + 6 files changed, 239 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td index 0896f21954603..29f4c1eb151c5 100644 --- a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td +++ b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td @@ -280,6 +280,69 @@ def Index_MaxUOp : IndexBinaryOp<"maxu"> { }]; } +//===----------------------------------------------------------------------===// +// ShlOp +//===----------------------------------------------------------------------===// + +def Index_ShlOp : IndexBinaryOp<"shl"> { + let summary = "index shift left"; + let description = [{ + The `index.shl` operation shifts an index value to the left by a variable + amount. The low order bits are filled with zeroes. The RHS operand is always + treated as unsigned. If the RHS operand is equal to or greater than the + index bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a << b + %c = index.shl %a, %b + ``` + }]; +} + +//===----------------------------------------------------------------------===// +// ShrSOp +//===----------------------------------------------------------------------===// + +def Index_ShrSOp : IndexBinaryOp<"shrs"> { + let summary = "signed index shift right"; + let description = [{ + The `index.shrs` operation shifts an index value to the right by a variable + amount. The LHS operand is treated as signed. The high order bits are filled + with copies of the most significant bit. If the RHS operand is equal to or + greater than the index bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a >> b + %c = index.shrs %a, %b + ``` + }]; +} + +//===----------------------------------------------------------------------===// +// ShrUOp +//===----------------------------------------------------------------------===// + +def Index_ShrUOp : IndexBinaryOp<"shru"> { + let summary = "unsigned index shift right"; + let description = [{ + The `index.shru` operation shifts an index value to the right by a variable + amount. The LHS operand is treated as unsigned. The high order bits are + filled with zeroes. If the RHS operand is equal to or greater than the index + bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a >> b + %c = index.shru %a, %b + ``` + }]; +} + //===----------------------------------------------------------------------===// // CastSOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp b/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp index 844c57a74a198..4461d5121ef01 100644 --- a/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp +++ b/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp @@ -268,6 +268,11 @@ using ConvertIndexMaxS = mlir::OneToOneConvertToLLVMPattern; using ConvertIndexMaxU = mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShl = mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShrS = + mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShrU = + mlir::OneToOneConvertToLLVMPattern; using ConvertIndexBoolConstant = mlir::OneToOneConvertToLLVMPattern; @@ -290,6 +295,9 @@ void index::populateIndexToLLVMConversionPatterns( ConvertIndexRemU, ConvertIndexMaxS, ConvertIndexMaxU, + ConvertIndexShl, + ConvertIndexShrS, + ConvertIndexShrU, ConvertIndexCeilDivS, ConvertIndexCeilDivU, ConvertIndexFloorDivS, diff --git a/mlir/lib/Dialect/Index/IR/IndexOps.cpp b/mlir/lib/Dialect/Index/IR/IndexOps.cpp index fcbb076f2e16f..241fa416eddab 100644 --- a/mlir/lib/Dialect/Index/IR/IndexOps.cpp +++ b/mlir/lib/Dialect/Index/IR/IndexOps.cpp @@ -62,17 +62,19 @@ Operation *IndexDialect::materializeConstant(OpBuilder &b, Attribute value, /// the integer result, which in turn must satisfy the above property. static OpFoldResult foldBinaryOpUnchecked( ArrayRef operands, - function_ref calculate) { + function_ref(const APInt &, const APInt &)> calculate) { assert(operands.size() == 2 && "binary operation expected 2 operands"); auto lhs = dyn_cast_if_present(operands[0]); auto rhs = dyn_cast_if_present(operands[1]); if (!lhs || !rhs) return {}; - APInt result = calculate(lhs.getValue(), rhs.getValue()); - assert(result.trunc(32) == + Optional result = calculate(lhs.getValue(), rhs.getValue()); + if (!result) + return {}; + assert(result->trunc(32) == calculate(lhs.getValue().trunc(32), rhs.getValue().trunc(32))); - return IntegerAttr::get(IndexType::get(lhs.getContext()), std::move(result)); + return IntegerAttr::get(IndexType::get(lhs.getContext()), std::move(*result)); } /// Fold an index operation only if the truncated 64-bit result matches the @@ -284,6 +286,50 @@ OpFoldResult MaxUOp::fold(ArrayRef operands) { }); } +//===----------------------------------------------------------------------===// +// ShlOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShlOp::fold(ArrayRef operands) { + return foldBinaryOpUnchecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // We cannot fold if the RHS is greater than or equal to 32 because + // this would be UB in 32-bit systems but not on 64-bit systems. RHS is + // already treated as unsigned. + if (rhs.uge(32)) + return {}; + return lhs << rhs; + }); +} + +//===----------------------------------------------------------------------===// +// ShrSOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShrSOp::fold(ArrayRef operands) { + return foldBinaryOpChecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // Don't fold if RHS is greater than or equal to 32. + if (rhs.uge(32)) + return {}; + return lhs.ashr(rhs); + }); +} + +//===----------------------------------------------------------------------===// +// ShrUOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShrUOp::fold(ArrayRef operands) { + return foldBinaryOpChecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // Don't fold if RHS is greater than or equal to 32. + if (rhs.uge(32)) + return {}; + return lhs.lshr(rhs); + }); +} + //===----------------------------------------------------------------------===// // CastSOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir b/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir index ee8e6629aa719..c6b2273fa1f3f 100644 --- a/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir +++ b/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir @@ -22,8 +22,14 @@ func.func @trivial_ops(%a: index, %b: index) { %7 = index.maxs %a, %b // CHECK: llvm.intr.umax %8 = index.maxu %a, %b + // CHECK: llvm.shl + %9 = index.shl %a, %b + // CHECK: llvm.ashr + %10 = index.shrs %a, %b + // CHECK: llvm.lshr + %11 = index.shru %a, %b // CHECK: llvm.mlir.constant(true - %9 = index.bool.constant true + %12 = index.bool.constant true return } diff --git a/mlir/test/Dialect/Index/index-canonicalize.mlir b/mlir/test/Dialect/Index/index-canonicalize.mlir index f9b33f88a1a26..288593f64c3f7 100644 --- a/mlir/test/Dialect/Index/index-canonicalize.mlir +++ b/mlir/test/Dialect/Index/index-canonicalize.mlir @@ -279,6 +279,111 @@ func.func @maxu() -> index { return %0 : index } +// CHECK-LABEL: @shl +func.func @shl() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 512 + %0 = index.shl %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shl_32 +func.func @shl_32() -> index { + %lhs = index.constant 1 + %rhs = index.constant 32 + // CHECK: index.shl + %0 = index.shl %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shl_edge +func.func @shl_edge() -> index { + %lhs = index.constant 4000000000 + %rhs = index.constant 31 + // CHECK: %[[A:.*]] = index.constant 858{{[0-9]+}} + %0 = index.shl %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shrs +func.func @shrs() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 32 + %0 = index.shrs %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shrs_32 +func.func @shrs_32() -> index { + %lhs = index.constant 4000000000000 + %rhs = index.constant 32 + // CHECK: index.shrs + %0 = index.shrs %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shrs_nofold +func.func @shrs_nofold() -> index { + %lhs = index.constant 0x100000000 + %rhs = index.constant 1 + // CHECK: index.shrs + %0 = index.shrs %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shrs_edge +func.func @shrs_edge() -> index { + %lhs = index.constant 0x10000000000 + %rhs = index.constant 3 + // CHECK: %[[A:.*]] = index.constant 137{{[0-9]+}} + %0 = index.shrs %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shru +func.func @shru() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 32 + %0 = index.shru %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shru_32 +func.func @shru_32() -> index { + %lhs = index.constant 4000000000000 + %rhs = index.constant 32 + // CHECK: index.shru + %0 = index.shru %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shru_nofold +func.func @shru_nofold() -> index { + %lhs = index.constant 0x100000000 + %rhs = index.constant 1 + // CHECK: index.shru + %0 = index.shru %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shru_edge +func.func @shru_edge() -> index { + %lhs = index.constant 0x10000000000 + %rhs = index.constant 3 + // CHECK: %[[A:.*]] = index.constant 137{{[0-9]+}} + %0 = index.shru %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + // CHECK-LABEL: @cmp func.func @cmp() -> (i1, i1, i1, i1) { %a = index.constant 0 diff --git a/mlir/test/Dialect/Index/index-ops.mlir b/mlir/test/Dialect/Index/index-ops.mlir index 2176efe337309..d1a409780cd51 100644 --- a/mlir/test/Dialect/Index/index-ops.mlir +++ b/mlir/test/Dialect/Index/index-ops.mlir @@ -27,6 +27,12 @@ func.func @binary_ops(%a: index, %b: index) { %10 = index.maxs %a, %b // CHECK-NEXT: index.maxu %[[A]], %[[B]] %11 = index.maxu %a, %b + // CHECK-NEXT: index.shl %[[A]], %[[B]] + %12 = index.shl %a, %b + // CHECK-NEXT: index.shrs %[[A]], %[[B]] + %13 = index.shrs %a, %b + // CHECK-NEXT: index.shru %[[A]], %[[B]] + %14 = index.shru %a, %b return } From c0725865b188f71f904ecd4dac56ef37268b30d2 Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Thu, 3 Nov 2022 12:44:10 +0000 Subject: [PATCH 202/516] [clang][dataflow] Generalize custom comparison to return tri-value result. Currently, the API for a model's custom value comparison returns a boolean. Therefore, models cannot distinguish between situations where the values are recognized by the model and different and those where the values are just not recognized. This patch changes the return value to a tri-valued enum, allowing models to express "don't know". This patch is essentially a NFC -- no practical differences result from this change in this patch. But, it prepares for future patches (particularly, upcoming patches for widening) which will take advantage of the new flexibility. Differential Revision: https://reviews.llvm.org/D137334 --- .../FlowSensitive/DataflowEnvironment.h | 27 +++++++++---- .../Models/UncheckedOptionalAccessModel.h | 6 +-- .../FlowSensitive/DataflowEnvironment.cpp | 4 +- .../Models/UncheckedOptionalAccessModel.cpp | 20 +++++----- .../TypeErasedDataflowAnalysisTest.cpp | 38 ++++++++++--------- 5 files changed, 56 insertions(+), 39 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index efea46b4a0c5b..e362d79263ff2 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -48,6 +48,13 @@ enum class SkipPast { ReferenceThenPointer, }; +/// Indicates the result of a tentative comparison. +enum class ComparisonResult { + Same, + Different, + Unknown, +}; + /// Holds the state of the program (store and heap) at a given program point. /// /// WARNING: Symbolic values that are created by the environment for static @@ -62,7 +69,11 @@ class Environment { public: virtual ~ValueModel() = default; - /// Returns true if and only if `Val1` is equivalent to `Val2`. + /// Returns: + /// `Same`: `Val1` is equivalent to `Val2`, according to the model. + /// `Different`: `Val1` is distinct from `Val2`, according to the model. + /// `Unknown`: The model can't determine a relationship between `Val1` and + /// `Val2`. /// /// Requirements: /// @@ -72,16 +83,16 @@ class Environment { /// /// `Val1` and `Val2` must be assigned to the same storage location in /// `Env1` and `Env2` respectively. - virtual bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) { + virtual ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) { // FIXME: Consider adding QualType to StructValue and removing the Type // argument here. // - // FIXME: default to a sound comparison and/or expand the comparison logic - // built into the framework to support broader forms of equivalence than - // strict pointer equality. - return true; + // FIXME: default to a sound comparison (`Unknown`) and/or expand the + // comparison logic built into the framework to support broader forms of + // equivalence than strict pointer equality. + return ComparisonResult::Same; } /// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h index 66aabb531a213..b053a10327c3f 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h +++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h @@ -54,9 +54,9 @@ class UncheckedOptionalAccessModel void transfer(const CFGElement *Elt, NoopLattice &L, Environment &Env); - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override; + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override; bool merge(QualType Type, const Value &Val1, const Environment &Env1, const Value &Val2, const Environment &Env2, Value &MergedVal, diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 0b098c43ba3d1..ab1241d95eea0 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -295,8 +295,8 @@ bool Environment::equivalentTo(const Environment &Other, assert(It->second != nullptr); if (!areEquivalentValues(*Val, *It->second) && - !Model.compareEquivalent(Loc->getType(), *Val, *this, *It->second, - Other)) + Model.compare(Loc->getType(), *Val, *this, *It->second, Other) != + ComparisonResult::Same) return false; } diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 1ffd88697f3a7..1a41cfaa5fa13 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -208,7 +208,7 @@ QualType stripReference(QualType Type) { } /// Returns true if and only if `Type` is an optional type. -bool IsOptionalType(QualType Type) { +bool isOptionalType(QualType Type) { if (!Type->isRecordType()) return false; // FIXME: Optimize this by avoiding the `getQualifiedNameAsString` call. @@ -222,7 +222,7 @@ bool IsOptionalType(QualType Type) { /// For example, if `Type` is `optional>`, the result of this /// function will be 2. int countOptionalWrappers(const ASTContext &ASTCtx, QualType Type) { - if (!IsOptionalType(Type)) + if (!isOptionalType(Type)) return 0; return 1 + countOptionalWrappers( ASTCtx, @@ -720,12 +720,14 @@ void UncheckedOptionalAccessModel::transfer(const CFGElement *Elt, TransferMatchSwitch(*Elt, getASTContext(), State); } -bool UncheckedOptionalAccessModel::compareEquivalent(QualType Type, - const Value &Val1, - const Environment &Env1, - const Value &Val2, - const Environment &Env2) { - return isNonEmptyOptional(Val1, Env1) == isNonEmptyOptional(Val2, Env2); +ComparisonResult UncheckedOptionalAccessModel::compare( + QualType Type, const Value &Val1, const Environment &Env1, + const Value &Val2, const Environment &Env2) { + if (!isOptionalType(Type)) + return ComparisonResult::Unknown; + return isNonEmptyOptional(Val1, Env1) == isNonEmptyOptional(Val2, Env2) + ? ComparisonResult::Same + : ComparisonResult::Different; } bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, @@ -734,7 +736,7 @@ bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, const Environment &Env2, Value &MergedVal, Environment &MergedEnv) { - if (!IsOptionalType(Type)) + if (!isOptionalType(Type)) return true; auto &HasValueVal = MergedEnv.makeAtomicBoolValue(); diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index e36f207389e4c..8e0e27efae9e8 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -351,24 +351,27 @@ class SpecialBoolAnalysis final } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { const auto *Decl = Type->getAsCXXRecordDecl(); if (Decl == nullptr || Decl->getIdentifier() == nullptr || Decl->getName() != "SpecialBool") - return false; + return ComparisonResult::Unknown; auto *IsSet1 = cast_or_null(Val1.getProperty("is_set")); + auto *IsSet2 = cast_or_null(Val2.getProperty("is_set")); if (IsSet1 == nullptr) - return true; + return IsSet2 == nullptr ? ComparisonResult::Same + : ComparisonResult::Different; - auto *IsSet2 = cast_or_null(Val2.getProperty("is_set")); if (IsSet2 == nullptr) - return false; + return ComparisonResult::Different; return Env1.flowConditionImplies(*IsSet1) == - Env2.flowConditionImplies(*IsSet2); + Env2.flowConditionImplies(*IsSet2) + ? ComparisonResult::Same + : ComparisonResult::Different; } // Always returns `true` to accept the `MergedVal`. @@ -509,18 +512,19 @@ class OptionalIntAnalysis final } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { // Nothing to say about a value that does not model an `OptionalInt`. if (!Type->isRecordType() || Type->getAsCXXRecordDecl()->getQualifiedNameAsString() != "OptionalInt") - return false; + return ComparisonResult::Unknown; auto *Prop1 = Val1.getProperty("has_value"); auto *Prop2 = Val2.getProperty("has_value"); assert(Prop1 != nullptr && Prop2 != nullptr); - return areEquivalentValues(*Prop1, *Prop2); + return areEquivalentValues(*Prop1, *Prop2) ? ComparisonResult::Same + : ComparisonResult::Different; } bool merge(QualType Type, const Value &Val1, const Environment &Env1, @@ -1182,12 +1186,12 @@ class TopAnalysis final : public DataflowAnalysis { } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { // Changes to a sound approximation, which allows us to test whether we can // (soundly) converge for some loops. - return false; + return ComparisonResult::Unknown; } }; From 3c28a6d2cbf7faeecde6aee33dfc6b05ccfeb7a4 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 3 Nov 2022 16:13:54 -0700 Subject: [PATCH 203/516] [lld][WebAssembly] Add support for LLD_REPRODUCE This is feature of the ELF and COFF linker that we were missing until now. Differential Revision: https://reviews.llvm.org/D137377 --- lld/test/wasm/reproduce.ll | 27 --------------------------- lld/test/wasm/reproduce.s | 32 ++++++++++++++++++++++++++++++++ lld/wasm/Driver.cpp | 9 +++++++-- 3 files changed, 39 insertions(+), 29 deletions(-) delete mode 100644 lld/test/wasm/reproduce.ll create mode 100644 lld/test/wasm/reproduce.s diff --git a/lld/test/wasm/reproduce.ll b/lld/test/wasm/reproduce.ll deleted file mode 100644 index b00727e3b3e26..0000000000000 --- a/lld/test/wasm/reproduce.ll +++ /dev/null @@ -1,27 +0,0 @@ -; REQUIRES: shell -; RUN: rm -rf %t.dir -; RUN: mkdir -p %t.dir -; RUN: llc -filetype=obj %s -o %t.dir/foo.o -; RUN: wasm-ld --reproduce=%t.dir/repro.tar -o %t.dir/out.wasm %t.dir/foo.o - -; RUN: cd %t.dir -; RUN: tar tf repro.tar | FileCheck --check-prefix=TAR %s - -; TAR: repro/response.txt -; TAR: repro/version.txt -; TAR: repro/{{.*}}/foo.o - -; RUN: tar xf repro.tar -; RUN: FileCheck --check-prefix=RSP %s < repro/response.txt - -; RSP: -o {{.*}}out.wasm -; RSP: {{.*}}/foo.o - -; RUN: FileCheck %s --check-prefix=VERSION < repro/version.txt -; VERSION: LLD - -target triple = "wasm32-unknown-unknown" - -define void @_start() { - ret void -} diff --git a/lld/test/wasm/reproduce.s b/lld/test/wasm/reproduce.s new file mode 100644 index 0000000000000..a89843fe219e3 --- /dev/null +++ b/lld/test/wasm/reproduce.s @@ -0,0 +1,32 @@ +# REQUIRES: shell +# RUN: rm -rf %t.dir +# RUN: mkdir -p %t.dir +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.dir/foo.o %s +# RUN: wasm-ld --reproduce=%t.dir/repro.tar -o %t.dir/out.wasm %t.dir/foo.o +# RUN: env LLD_REPRODUCE=%t.dir/repro2.tar wasm-ld -o %t.dir/out.wasm %t.dir/foo.o + +# RUN: cd %t.dir +# RUN: tar tf repro.tar | FileCheck --check-prefix=TAR %s +# RUN: tar tf repro2.tar | FileCheck --check-prefix=TAR2 %s + +# TAR: repro/response.txt +# TAR: repro/version.txt +# TAR: repro/{{.*}}/foo.o + +# TAR2: repro2/response.txt +# TAR2: repro2/version.txt +# TAR2: repro2/{{.*}}/foo.o + +# RUN: tar xf repro.tar +# RUN: FileCheck --check-prefix=RSP %s < repro/response.txt + +# RSP: -o {{.*}}out.wasm +# RSP: {{.*}}/foo.o + +# RUN: FileCheck %s --check-prefix=VERSION < repro/version.txt +# VERSION: LLD + +.globl _start +_start: + .functype _start () -> () + end_function diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index db1ef5ffff778..62cd6192b01d8 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -618,6 +618,12 @@ static void checkOptions(opt::InputArgList &args) { } } +static const char *getReproduceOption(opt::InputArgList &args) { + if (auto *arg = args.getLastArg(OPT_reproduce)) + return arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + // Force Sym to be entered in the output. Used for -u or equivalent. static Symbol *handleUndefined(StringRef name) { Symbol *sym = symtab->find(name); @@ -955,8 +961,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } // Handle --reproduce - if (auto *arg = args.getLastArg(OPT_reproduce)) { - StringRef path = arg->getValue(); + if (const char *path = getReproduceOption(args)) { Expected> errOrWriter = TarWriter::create(path, path::stem(path)); if (errOrWriter) { From 2bbafe04fe785a9469bea5a3737f8d7d3ce4aca2 Mon Sep 17 00:00:00 2001 From: "Oleksandr \"Alex\" Zinenko" Date: Fri, 4 Nov 2022 01:03:01 +0100 Subject: [PATCH 204/516] Add TOC to GPU.md --- mlir/docs/Dialects/GPU.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index fc2391dd1ed92..4b138ca23d5d6 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -12,6 +12,8 @@ manipulations to launch a GPU kernel and provide a simple path towards GPU execution from MLIR. It may be targeted, for example, by DSLs using MLIR. The dialect uses `gpu` as its canonical prefix. +[TOC] + ## Memory attribution Memory buffers are defined at the function level, either in "gpu.launch" or in From e3ccbae309273900a42e30b606c15c873d57f1ea Mon Sep 17 00:00:00 2001 From: Wanyi Ye Date: Sat, 10 Sep 2022 11:27:09 -0700 Subject: [PATCH 205/516] [lldb-vscode] Send Statistics Dump in terminated event This patch will gather debug info & breakpoint info from the statistics dump (from `(SBTarget.GetStatistics())` func) and send to DAP in terminated event. The statistics content can be huge (especially the `modules`) and dumping in full JSON can create delay in the IDE's debugging UI. (For more details, please read: https://github.com/llvm/llvm-project/commit/7bbd0fba986c241162b77b7e424ad82bc7e17b41 ). Hence, we will filter out large contents before returning it in terminated event. It will keep all the metadata fields (those starts with "total"). For large contents, it uses the opt-out strategy. Currently it only removes the "modules" field. This way every time a new top-level field being added, we will be able to capture them from DAP log without changing lldb-vscode. The DAP terminated event should look like ``` { "event":"terminated", "seq":0, "statistics": { "memory": "targets": , // it's a JSON array, breakpoints info included in each target // pairs }, "type":"event" } ``` All the info above will be append to statistics field in the terminated event Test Plan Debugged a simple hello world program from VSCode. Exit debug session in two ways: 1) run to program exit; 2) user initiated debug session end (quit debugging before program exit). Check DAP log and see both debug sessions have statistics returned in terminated event. Here's an example when debugging the test program: ``` {"event":"terminated","seq":0,"statistics":{"memory":"{\"strings\":{\"bytesTotal\":1843200,\"bytesUnused\":897741,\"bytesUsed\":945459}}","targets":"[{\"breakpoints\":[{\"details\":{\"Breakpoint\":{\"BKPTOptions\":{\"AutoContinue\":false,\"ConditionText\":\"\",\"EnabledState\":true,\"IgnoreCount\":0,\"OneShotState\":false},\"BKPTResolver\":{\"Options\":{\"NameMask\":[56],\"Offset\":0,\"SkipPrologue\":true,\"SymbolNames\":[\"foo\"]},\"Type\":\"SymbolName\"},\"Hardware\":false,\"Names\":[\"vscode\"],\"SearchFilter\":{\"Options\":{},\"Type\":\"Unconstrained\"}}},\"id\":1,\"internal\":false,\"numLocations\":1,\"numResolvedLocations\":1,\"resolveTime\":0.002232},{\"details\":{\"Breakpoint\":{\"BKPTOptions\":{\"AutoContinue\":false,\"ConditionText\":\"\",\"EnabledState\":true,\"IgnoreCount\":0,\"OneShotState\":false},\"BKPTResolver\":{\"Options\":{\"Column\":0,\"Exact\":false,\"FileName\":\"/data/users/wanyi/llvm-sand/external/llvm-project/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp\",\"Inlines\":true,\"LineNumber\":5,\"Offset\":0,\"SkipPrologue\":true},\"Type\":\"FileAndLine\"},\"Hardware\":false,\"Names\":[\"vscode\"],\"SearchFilter\":{\"Options\":{},\"Type\":\"Unconstrained\"}}},\"id\":2,\"internal\":false,\"numLocations\":0,\"numResolvedLocations\":0,\"resolveTime\":0.23203799999999999},{\"details\":{\"Breakpoint\":{\"BKPTOptions\":{\"AutoContinue\":false,\"ConditionText\":\"\",\"EnabledState\":true,\"IgnoreCount\":0,\"OneShotState\":false},\"BKPTResolver\":{\"Options\":{\"Language\":\"c\",\"NameMask\":[4,4,4,4,4,4],\"Offset\":0,\"SkipPrologue\":false,\"SymbolNames\":[\"_dl_debug_state\",\"rtld_db_dlactivity\",\"__dl_rtld_db_dlactivity\",\"r_debug_state\",\"_r_debug_state\",\"_rtld_debug_state\"]},\"Type\":\"SymbolName\"},\"Hardware\":false,\"SearchFilter\":{\"Options\":{\"ModuleList\":[\"/usr/lib64/ld-2.28.so\"]},\"Type\":\"Modules\"}}},\"id\":-1,\"internal\":true,\"kindDescription\":\"shared-library-event\",\"numLocations\":1,\"numResolvedLocations\":1,\"resolveTime\":0.00026699999999999998}],\"expressionEvaluation\":{\"failures\":0,\"successes\":0},\"firstStopTime\":0.087458974999999994,\"frameVariable\":{\"failures\":0,\"successes\":0},\"launchOrAttachTime\":0.052953161999999998,\"moduleIdentifiers\":[94554748126576,94554747837792,94554747149216,139800112130176,139800112161056,139800112206064,139800112340224,139800112509552,139800112236528],\"signals\":[{\"SIGSTOP\":1}],\"sourceMapDeduceCount\":0,\"stopCount\":8,\"targetCreateTime\":0.00057700000000000004,\"totalBreakpointResolveTime\":0.234537}]","totalDebugInfoByteSize":1668056,"totalDebugInfoEnabled":3,"totalDebugInfoIndexLoadedFromCache":0,"totalDebugInfoIndexSavedToCache":0,"totalDebugInfoIndexTime":0.027963000000000002,"totalDebugInfoParseTime":0.34354800000000002,"totalModuleCount":10,"totalModuleCountHasDebugInfo":3,"totalSymbolTableIndexTime":0.056050000000000003,"totalSymbolTableParseTime":0.23930000000000001,"totalSymbolTableStripped":0,"totalSymbolTablesLoadedFromCache":0,"totalSymbolTablesSavedToCache":0},"type":"event"} ``` Differential Revision: https://reviews.llvm.org/D137003 --- .../test/tools/lldb-vscode/vscode.py | 8 ++- .../lldb-vscode/terminated-event/Makefile | 17 +++++ .../TestVSCode_terminatedEvent.py | 63 +++++++++++++++++ .../lldb-vscode/terminated-event/foo.cpp | 3 + .../tools/lldb-vscode/terminated-event/foo.h | 1 + .../lldb-vscode/terminated-event/main.cpp | 8 +++ lldb/tools/lldb-vscode/JSONUtils.cpp | 69 +++++++++++++++++++ lldb/tools/lldb-vscode/JSONUtils.h | 6 ++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 4 +- 9 files changed, 176 insertions(+), 3 deletions(-) create mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py create mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp create mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/foo.h create mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index d6a6abca53e38..c2de4ad5c7d9a 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -369,7 +369,13 @@ def wait_for_stopped(self, timeout=None): def wait_for_exited(self): event_dict = self.wait_for_event('exited') if event_dict is None: - raise ValueError("didn't get stopped event") + raise ValueError("didn't get exited event") + return event_dict + + def wait_for_terminated(self): + event_dict = self.wait_for_event('terminated') + if event_dict is None: + raise ValueError("didn't get terminated event") return event_dict def get_initialize_value(self, key): diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile b/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile new file mode 100644 index 0000000000000..b30baf48b972e --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile @@ -0,0 +1,17 @@ +DYLIB_NAME := foo +DYLIB_CXX_SOURCES := foo.cpp +CXX_SOURCES := main.cpp + +LD_EXTRAS := -Wl,-rpath "-Wl,$(shell pwd)" +USE_LIBDL :=1 + +include Makefile.rules + +all: a.out.stripped + +a.out.stripped: + strip -o a.out.stripped a.out + +ifneq "$(CODESIGN)" "" + $(CODESIGN) -fs - a.out.stripped +endif \ No newline at end of file diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py b/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py new file mode 100644 index 0000000000000..bc516a0ed0e37 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py @@ -0,0 +1,63 @@ +""" +Test lldb-vscode terminated event +""" + +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import re +import json + +class TestVSCode_terminatedEvent(lldbvscode_testcase.VSCodeTestCaseBase): + + @skipIfWindows + @skipIfRemote + def test_terminated_event(self): + ''' + Terminated Event + Now contains the statistics of a debug session: + metatdata: + totalDebugInfoByteSize > 0 + totalDebugInfoEnabled > 0 + totalModuleCountHasDebugInfo > 0 + ... + targetInfo: + totalBreakpointResolveTime > 0 + breakpoints: + recognize function breakpoint + recognize source line breakpoint + It should contains the breakpoints info: function bp & source line bp + ''' + + program_basename = "a.out.stripped" + program = self.getBuildArtifact(program_basename) + self.build_and_launch(program) + # Set breakpoints + functions = ['foo'] + breakpoint_ids = self.set_function_breakpoints(functions) + self.assertEquals(len(breakpoint_ids), len(functions), 'expect one breakpoint') + main_bp_line = line_number('main.cpp', '// main breakpoint 1') + breakpoint_ids.append(self.set_source_breakpoints('main.cpp', [main_bp_line])) + + self.continue_to_breakpoints(breakpoint_ids) + self.continue_to_exit() + + statistics = self.vscode.wait_for_terminated()['statistics'] + self.assertTrue(statistics['totalDebugInfoByteSize'] > 0) + self.assertTrue(statistics['totalDebugInfoEnabled'] > 0) + self.assertTrue(statistics['totalModuleCountHasDebugInfo'] > 0) + + self.assertIsNotNone(statistics['memory']) + + # lldb-vscode debugs one target at a time + target = json.loads(statistics['targets'])[0] + self.assertTrue(target['totalBreakpointResolveTime'] > 0) + + breakpoints = target['breakpoints'] + self.assertIn('foo', + breakpoints[0]['details']['Breakpoint']['BKPTResolver']['Options']['SymbolNames'], + 'foo is a symbol breakpoint') + self.assertTrue(breakpoints[1]['details']['Breakpoint']['BKPTResolver']['Options']['FileName'].endswith('main.cpp'), + 'target has source line breakpoint in main.cpp') diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp new file mode 100644 index 0000000000000..9dba85a9cccab --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp @@ -0,0 +1,3 @@ +int foo() { + return 12; +} diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h new file mode 100644 index 0000000000000..5d5f8f0c9e786 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h @@ -0,0 +1 @@ +int foo(); diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp b/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp new file mode 100644 index 0000000000000..cd984e560e0d2 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp @@ -0,0 +1,8 @@ +#include +#include "foo.h" + +int main(int argc, char const *argv[]) { + std::cout << "Hello World!" << std::endl; // main breakpoint 1 + foo(); + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 39c24f8b23e39..bd8a9148c241f 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -19,6 +19,8 @@ #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBBreakpointLocation.h" #include "lldb/API/SBDeclaration.h" +#include "lldb/API/SBStringList.h" +#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBValue.h" #include "lldb/Host/PosixApi.h" @@ -1140,6 +1142,73 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, return reverse_request; } +// Keep all the top level items from the statistics dump, except for the +// "modules" array. It can be huge and cause delay +// Array and dictionary value will return as pairs +void FilterAndGetValueForKey(const lldb::SBStructuredData data, const char *key, + llvm::json::Object &out) { + lldb::SBStructuredData value = data.GetValueForKey(key); + std::string key_utf8 = llvm::json::fixUTF8(key); + if (strcmp(key, "modules") == 0) + return; + switch (value.GetType()) { + case lldb::eStructuredDataTypeFloat: + out.try_emplace(key_utf8, value.GetFloatValue()); + break; + case lldb::eStructuredDataTypeInteger: + out.try_emplace(key_utf8, value.GetIntegerValue()); + break; + case lldb::eStructuredDataTypeArray: { + lldb::SBStream contents; + value.GetAsJSON(contents); + EmplaceSafeString(out, key, contents.GetData()); + } break; + case lldb::eStructuredDataTypeBoolean: + out.try_emplace(key_utf8, value.GetBooleanValue()); + break; + case lldb::eStructuredDataTypeString: { + // Get the string size before reading + const size_t str_length = value.GetStringValue(nullptr, 0); + std::string str(str_length + 1, 0); + value.GetStringValue(&str[0], str_length); + EmplaceSafeString(out, key, str); + } break; + case lldb::eStructuredDataTypeDictionary: { + lldb::SBStream contents; + value.GetAsJSON(contents); + EmplaceSafeString(out, key, contents.GetData()); + } break; + case lldb::eStructuredDataTypeNull: + case lldb::eStructuredDataTypeGeneric: + case lldb::eStructuredDataTypeInvalid: + break; + } +} + +void addStatistic(llvm::json::Object &event) { + lldb::SBStructuredData statistics = g_vsc.target.GetStatistics(); + bool is_dictionary = + statistics.GetType() == lldb::eStructuredDataTypeDictionary; + if (!is_dictionary) + return; + llvm::json::Object stats_body; + + lldb::SBStringList keys; + if (!statistics.GetKeys(keys)) + return; + for (size_t i = 0; i < keys.GetSize(); i++) { + const char *key = keys.GetStringAtIndex(i); + FilterAndGetValueForKey(statistics, key, stats_body); + } + event.try_emplace("statistics", std::move(stats_body)); +} + +llvm::json::Object CreateTerminatedEventObject() { + llvm::json::Object event(CreateEventObject("terminated")); + addStatistic(event); + return event; +} + std::string JSONToString(const llvm::json::Value &json) { std::string data; llvm::raw_string_ostream os(data); diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index bb81b88895938..c812ec87beab0 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -485,6 +485,12 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, llvm::StringRef debug_adaptor_path, llvm::StringRef comm_file); +/// Create a "Terminated" JSON object that contains statistics +/// +/// \return +/// A body JSON object with debug info and breakpoint info +llvm::json::Object CreateTerminatedEventObject(); + /// Convert a given JSON object to a string. std::string JSONToString(const llvm::json::Value &json); diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 1c6f9c829c388..21d2bc2229043 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -204,7 +204,7 @@ void SendTerminatedEvent() { g_vsc.sent_terminated_event = true; g_vsc.RunTerminateCommands(); // Send a "terminated" event - llvm::json::Object event(CreateEventObject("terminated")); + llvm::json::Object event(CreateTerminatedEventObject()); g_vsc.SendJSON(llvm::json::Value(std::move(event))); } } @@ -2949,7 +2949,7 @@ void request_variables(const llvm::json::Object &request) { const uint32_t addr_size = g_vsc.target.GetProcess().GetAddressByteSize(); lldb::SBValue reg_set = g_vsc.variables.registers.GetValueAtIndex(0); const uint32_t num_regs = reg_set.GetNumChildren(); - for (uint32_t reg_idx=0; reg_idx Date: Fri, 4 Nov 2022 09:04:57 +0800 Subject: [PATCH 206/516] [X86] Support -march=raptorlake, meteorlake Reviewed By: pengfei, skan, MaskRay Differential Revision: https://reviews.llvm.org/D135937 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Basic/Targets/X86.cpp | 5 +++++ clang/lib/CodeGen/CGBuiltin.cpp | 2 ++ clang/test/CodeGen/attr-target-mv.c | 10 ++++++++++ clang/test/CodeGen/target-builtin-noerror.c | 2 ++ clang/test/Driver/x86-march.c | 8 ++++++++ clang/test/Misc/target-invalid-cpu-note.c | 8 ++++---- clang/test/Preprocessor/predefined-arch-macros.c | 12 ++++++++++++ llvm/docs/ReleaseNotes.rst | 1 + llvm/include/llvm/Support/X86TargetParser.def | 10 ++++++++++ llvm/include/llvm/Support/X86TargetParser.h | 2 ++ llvm/lib/Support/Host.cpp | 6 ++++++ llvm/lib/Support/X86TargetParser.cpp | 4 ++++ llvm/lib/Target/X86/X86.td | 4 ++++ llvm/test/CodeGen/X86/cpus-intel.ll | 4 ++++ 15 files changed, 75 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 73d7aff9b8910..ad1a00b4bbcc4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -705,6 +705,7 @@ X86 Support in Clang * Support intrinsic of ``_mm(256)_cvtneobf16_ps``. * Support intrinsic of ``_mm(256)_cvtneoph_ps``. * Support intrinsic of ``_mm(256)_cvtneps_avx_pbh``. +- ``-march=raptorlake`` and ``-march=meteorlake`` are now supported. WebAssembly Support in Clang ---------------------------- diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 2d3f3d10c5716..a33a6f06c0182 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -524,6 +524,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_Tigerlake: case CK_SapphireRapids: case CK_Alderlake: + case CK_Raptorlake: + case CK_Meteorlake: // FIXME: Historically, we defined this legacy name, it would be nice to // remove it at some point. We've never exposed fine-grained names for // recent primary x86 CPUs, and we should keep it that way. @@ -1194,6 +1196,7 @@ bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true) +#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); @@ -1408,6 +1411,8 @@ Optional X86TargetInfo::getCPUCacheLineSize() const { case CK_Rocketlake: case CK_IcelakeServer: case CK_Alderlake: + case CK_Raptorlake: + case CK_Meteorlake: case CK_KNL: case CK_KNM: // K7 diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8bb5626392bd3..b0da121340556 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12935,6 +12935,8 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { .Case(ALIAS, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_TYPE(ENUM, STR) \ .Case(STR, {1u, static_cast(llvm::X86::ENUM)}) +#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ + .Case(ALIAS, {2u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE(ENUM, STR) \ .Case(STR, {2u, static_cast(llvm::X86::ENUM)}) #include "llvm/Support/X86TargetParser.def" diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c index e5241a1bbe54e..581f18e10b081 100644 --- a/clang/test/CodeGen/attr-target-mv.c +++ b/clang/test/CodeGen/attr-target-mv.c @@ -15,6 +15,8 @@ int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;} int __attribute__((target("arch=alderlake"))) foo(void) {return 11;} int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;} int __attribute__((target("arch=core2"))) foo(void) {return 13;} +int __attribute__((target("arch=raptorlake"))) foo(void) {return 14;} +int __attribute__((target("arch=meteorlake"))) foo(void) {return 15;} int __attribute__((target("default"))) foo(void) { return 2; } int bar(void) { @@ -149,6 +151,10 @@ void calls_pr50025c(void) { pr50025c(); } // LINUX: ret i32 12 // LINUX: define{{.*}} i32 @foo.arch_core2() // LINUX: ret i32 13 +// LINUX: define{{.*}} i32 @foo.arch_raptorlake() +// LINUX: ret i32 14 +// LINUX: define{{.*}} i32 @foo.arch_meteorlake() +// LINUX: ret i32 15 // LINUX: define{{.*}} i32 @foo() // LINUX: ret i32 2 // LINUX: define{{.*}} i32 @bar() @@ -180,6 +186,10 @@ void calls_pr50025c(void) { pr50025c(); } // WINDOWS: ret i32 12 // WINDOWS: define dso_local i32 @foo.arch_core2() // WINDOWS: ret i32 13 +// WINDOWS: define dso_local i32 @foo.arch_raptorlake() +// WINDOWS: ret i32 14 +// WINDOWS: define dso_local i32 @foo.arch_meteorlake() +// WINDOWS: ret i32 15 // WINDOWS: define dso_local i32 @foo() // WINDOWS: ret i32 2 // WINDOWS: define dso_local i32 @bar() diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index b1159aeacf90b..fc3d9ef8e572e 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -115,7 +115,9 @@ void verifycpustrings(void) { (void)__builtin_cpu_is("ivybridge"); (void)__builtin_cpu_is("knl"); (void)__builtin_cpu_is("knm"); + (void)__builtin_cpu_is("meteorlake"); (void)__builtin_cpu_is("nehalem"); + (void)__builtin_cpu_is("raptorlake"); (void)__builtin_cpu_is("rocketlake"); (void)__builtin_cpu_is("sandybridge"); (void)__builtin_cpu_is("shanghai"); diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c index 46d8d5da32356..6b8dcd79faffc 100644 --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -88,6 +88,14 @@ // RUN: | FileCheck %s -check-prefix=alderlake // alderlake: "-target-cpu" "alderlake" // +// RUN: %clang --target=x86_64 -c -### %s -march=raptorlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=raptorlake +// raptorlake: "-target-cpu" "raptorlake" +// +// RUN: %clang --target=x86_64 -c -### %s -march=meteorlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=meteorlake +// meteorlake: "-target-cpu" "meteorlake" +// // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \ // RUN: | FileCheck %s -check-prefix=lakemont // lakemont: "-target-cpu" "lakemont" diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index c0b542086a752..1ac86709cab3f 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -13,19 +13,19 @@ // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' -// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} +// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' -// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} +// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' -// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu' -// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 0ffa2739e5df4..10f0987a00399 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -1791,6 +1791,12 @@ // RUN: %clang -march=alderlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 +// RUN: %clang -march=raptorlake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 +// RUN: %clang -march=meteorlake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 // CHECK_ADL_M32: #define __ADX__ 1 // CHECK_ADL_M32: #define __AES__ 1 // CHECK_ADL_M32: #define __AVX2__ 1 @@ -1853,6 +1859,12 @@ // RUN: %clang -march=alderlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 +// RUN: %clang -march=raptorlake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 +// RUN: %clang -march=meteorlake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 // CHECK_ADL_M64: #define __ADX__ 1 // CHECK_ADL_M64: #define __AES__ 1 // CHECK_ADL_M64: #define __AVX2__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index ba9213557e935..d1a9cac4c31a4 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -148,6 +148,7 @@ Changes to the X86 Backend * Support ISA of ``AVX-IFMA``. * Support ISA of ``AVX-VNNI-INT8``. * Support ISA of ``AVX-NE-CONVERT``. +* ``-mcpu=raptorlake`` and ``-mcpu=meteorlake`` are now supported. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 6b6c740a9b1f8..8ffcc2152b1f1 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -60,6 +60,10 @@ X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm") #define X86_CPU_SUBTYPE(ENUM, STR) #endif +#ifndef X86_CPU_SUBTYPE_ALIAS +#define X86_CPU_SUBTYPE_ALIAS(ENUM, STR) +#endif + // This list must match what is implemented in libgcc and compilert-rt. Clang // uses this to know how to implement __builtin_cpu_is. X86_CPU_SUBTYPE(INTEL_COREI7_NEHALEM, "nehalem") @@ -89,6 +93,12 @@ X86_CPU_SUBTYPE(INTEL_COREI7_SAPPHIRERAPIDS, "sapphirerapids") X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE, "alderlake") X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3") X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake") + +// Alternate names supported by __builtin_cpu_is and target multiversioning. +X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake") +X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake") + +#undef X86_CPU_SUBTYPE_ALIAS #undef X86_CPU_SUBTYPE // This macro is used for cpu types present in compiler-rt/libgcc. The third diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index 612046f3b2d9c..922be6d2e508d 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -104,6 +104,8 @@ enum CPUKind { CK_Tigerlake, CK_SapphireRapids, CK_Alderlake, + CK_Raptorlake, + CK_Meteorlake, CK_KNL, CK_KNM, CK_Lakemont, diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index d4495b387bde6..bd8a206b84448 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -816,6 +816,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // Alderlake: case 0x97: case 0x9a: + // Raptorlake: + case 0xb7: + // Meteorlake: + case 0xb5: + case 0xaa: + case 0xac: CPU = "alderlake"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ALDERLAKE; diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 20bcfb3b9094a..7fcbb2108eea5 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -370,6 +370,10 @@ constexpr ProcInfo Processors[] = { { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids }, // Alderlake microarchitecture based processors. { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake }, + // Raptorlake microarchitecture based processors. + { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake }, + // Meteorlake microarchitecture based processors. + { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake }, // Knights Landing processor. { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL }, // Knights Mill processor. diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index b137529269bc8..3926c47882bb2 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1502,6 +1502,10 @@ def : ProcModel<"sapphirerapids", SkylakeServerModel, ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; def : ProcModel<"alderlake", AlderlakePModel, ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; +def : ProcModel<"raptorlake", AlderlakePModel, + ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; +def : ProcModel<"meteorlake", AlderlakePModel, + ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; // AMD CPUs. diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index f6be71d26100e..e4e24903319c4 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -17,6 +17,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -52,6 +54,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void From de865087f8c07c742dec3523ec8cbe6895bd57ac Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Fri, 4 Nov 2022 10:26:31 +0800 Subject: [PATCH 207/516] [LLDB] [LoongArch] Add loongarch64 case in ComputeHostArchitectureSupport() This is a simple change, loongarch64 host also supports 32-bit binaries, so note it. Without this patch: ``` [loongson@linux build]$ ./tools/lldb/unittests/Host/HostTests | tail -6 [==========] 78 tests from 18 test suites ran. (16 ms total) [ PASSED ] 77 tests. [ FAILED ] 1 test, listed below: [ FAILED ] HostTest.GetProcessInfo 1 FAILED TEST ``` With this patch: ``` [loongson@linux build]$ ./tools/lldb/unittests/Host/HostTests | tail -2 [==========] 78 tests from 18 test suites ran. (15 ms total) [ PASSED ] 78 tests. ``` Reviewed By: xen0n, MaskRay, DavidSpickett Differential Revision: https://reviews.llvm.org/D137312 --- lldb/source/Host/common/HostInfoBase.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Host/common/HostInfoBase.cpp b/lldb/source/Host/common/HostInfoBase.cpp index e8088344422a7..9a7b77c19de1d 100644 --- a/lldb/source/Host/common/HostInfoBase.cpp +++ b/lldb/source/Host/common/HostInfoBase.cpp @@ -340,6 +340,7 @@ void HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, case llvm::Triple::ppc64le: case llvm::Triple::x86_64: case llvm::Triple::riscv64: + case llvm::Triple::loongarch64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; From b3578f33c8b0a05070661565af54352fe3381f6d Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Fri, 4 Nov 2022 10:38:10 +0800 Subject: [PATCH 208/516] Revert "[LLDB] [LoongArch] Add loongarch64 case in ComputeHostArchitectureSupport()" This reverts commit de865087f8c07c742dec3523ec8cbe6895bd57ac. Reason to revert: author name is wrong. --- lldb/source/Host/common/HostInfoBase.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/source/Host/common/HostInfoBase.cpp b/lldb/source/Host/common/HostInfoBase.cpp index 9a7b77c19de1d..e8088344422a7 100644 --- a/lldb/source/Host/common/HostInfoBase.cpp +++ b/lldb/source/Host/common/HostInfoBase.cpp @@ -340,7 +340,6 @@ void HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, case llvm::Triple::ppc64le: case llvm::Triple::x86_64: case llvm::Triple::riscv64: - case llvm::Triple::loongarch64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; From 8cca7f3bf741287eb21ce273106244349a03345a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 4 Nov 2022 10:39:57 +0800 Subject: [PATCH 209/516] Reland "[LLDB] [LoongArch] Add loongarch64 case in ComputeHostArchitectureSupport()" This is a simple change, loongarch64 host also supports 32-bit binaries, so note it. Without this patch: ``` [loongson@linux build]$ ./tools/lldb/unittests/Host/HostTests | tail -6 [==========] 78 tests from 18 test suites ran. (16 ms total) [ PASSED ] 77 tests. [ FAILED ] 1 test, listed below: [ FAILED ] HostTest.GetProcessInfo 1 FAILED TEST ``` With this patch: ``` [loongson@linux build]$ ./tools/lldb/unittests/Host/HostTests | tail -2 [==========] 78 tests from 18 test suites ran. (15 ms total) [ PASSED ] 78 tests. ``` Reviewed By: SixWeining, xen0n, MaskRay, DavidSpickett Differential Revision: https://reviews.llvm.org/D137312 --- lldb/source/Host/common/HostInfoBase.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Host/common/HostInfoBase.cpp b/lldb/source/Host/common/HostInfoBase.cpp index e8088344422a7..9a7b77c19de1d 100644 --- a/lldb/source/Host/common/HostInfoBase.cpp +++ b/lldb/source/Host/common/HostInfoBase.cpp @@ -340,6 +340,7 @@ void HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, case llvm::Triple::ppc64le: case llvm::Triple::x86_64: case llvm::Triple::riscv64: + case llvm::Triple::loongarch64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; From e419620fc2f55cbc2decd468778376ec08d0190d Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Fri, 4 Nov 2022 10:15:11 +0800 Subject: [PATCH 210/516] [CodeGenPrep] Change ValueToSExts from DeseMap to MapVector mergeSExts iterates throught ValueToSExts. Using DenseMap result in unstable optimization path so that output IR may vary even if the input IR is same. Reviewed By: wxiao3 Differential Revision: https://reviews.llvm.org/D137234 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b9ffb8a8318fb..cf2b32c74eb5a 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -287,7 +287,7 @@ using SetOfInstrs = SmallPtrSet; using TypeIsSExt = PointerIntPair; using InstrToOrigTy = DenseMap; using SExts = SmallVector; -using ValueToSExts = DenseMap; +using ValueToSExts = MapVector; class TypePromotionTransaction; From 75b33d6bd518f6502a63f96e79c0f4be3691b1d5 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Fri, 4 Nov 2022 00:07:42 -0400 Subject: [PATCH 211/516] [LoopInterchange] Check phis in all subloops This is the bugfix to the miscompile mentioned in https://reviews.llvm.org/D132055#3814831. The IR that reproduced the bug is added as the test case in this patch. What this patch does is that, during legality phase instead of checking the phi nodes only in `InnerLoop` and `OuterLoop`, we check phi nodes in all subloops of the `OuterLoop`. Suppose if the loop nest is triply nested, and `InnerLoop` and `OuterLoop` is the middle loop and the outermost loop respectively, we'll check phi nodes in the innermost loop as well, in addition to the ones in the middle and outermost loops. Reviewed By: Meinersbur, #loopoptwg Differential Revision: https://reviews.llvm.org/D134930 --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 32 +++++--- .../multilevel-partial-reduction.ll | 77 +++++++++++++++++++ 2 files changed, 97 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 3cf35172b7499..05b807b57b4e1 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -831,18 +831,26 @@ bool LoopInterchangeLegality::currentLimitations() { } Inductions.clear(); - if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) { - LLVM_DEBUG( - dbgs() << "Only inner loops with induction or reduction PHI nodes " - << "are supported currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with induction or reduction PHI nodes can be" - " interchange currently."; - }); - return true; + // For multi-level loop nests, make sure that all phi nodes for inner loops + // at all levels can be recognized as a induction or reduction phi. Bail out + // if a phi node at a certain nesting level cannot be properly recognized. + Loop *CurLevelLoop = OuterLoop; + while (!CurLevelLoop->getSubLoops().empty()) { + // We already made sure that the loop nest is tightly nested. + CurLevelLoop = CurLevelLoop->getSubLoops().front(); + if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) { + LLVM_DEBUG( + dbgs() << "Only inner loops with induction or reduction PHI nodes " + << "are supported currently.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", + CurLevelLoop->getStartLoc(), + CurLevelLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."; + }); + return true; + } } // TODO: Triangular loops are not handled for now. diff --git a/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll new file mode 100644 index 0000000000000..5c40ee5b00e5c --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -loop-interchange -cache-line-size=4 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +@b = external global [512 x [4 x i32]] +@c = global [2 x [4 x i32]] zeroinitializer, align 1 + +; Check that the outermost and the middle loops are not interchanged since +; the innermost loop has a reduction operation which is however not in a form +; that loop interchange can handle. Interchanging the outermost and the +; middle loops would intervene with the reduction and cause miscompile. + +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedPHIInner +; REMARKS-NEXT: Function: test7 +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedPHIInner +; REMARKS-NEXT: Function: test7 + +define i32 @test7() { +entry: + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.inc19.i, %entry + %i.011.i = phi i16 [ 0, %entry ], [ %inc20.i, %for.inc19.i ] + br label %for.cond4.preheader.i + +for.cond4.preheader.i: ; preds = %middle.block, %for.cond1.preheader.i + %j.010.i = phi i16 [ 0, %for.cond1.preheader.i ], [ %inc17.i, %middle.block ] + %arrayidx14.i = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 %i.011.i, i16 %j.010.i + %arrayidx14.promoted.i = load i32, ptr %arrayidx14.i, align 1 + %0 = insertelement <4 x i32> , i32 %arrayidx14.promoted.i, i64 0 + br label %vector.body + +vector.body: ; preds = %vector.body, %for.cond4.preheader.i + %index = phi i16 [ 0, %for.cond4.preheader.i ], [ %index.next, %vector.body ] + %vec.phi = phi <4 x i32> [ %0, %for.cond4.preheader.i ], [ %16, %vector.body ] + %1 = or i16 %index, 1 + %2 = or i16 %index, 2 + %3 = or i16 %index, 3 + %4 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %index, i16 %j.010.i + %5 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %1, i16 %j.010.i + %6 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %2, i16 %j.010.i + %7 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %3, i16 %j.010.i + %8 = load i32, ptr %4, align 1 + %9 = load i32, ptr %5, align 1 + %10 = load i32, ptr %6, align 1 + %11 = load i32, ptr %7, align 1 + %12 = insertelement <4 x i32> poison, i32 %8, i64 0 + %13 = insertelement <4 x i32> %12, i32 %9, i64 1 + %14 = insertelement <4 x i32> %13, i32 %10, i64 2 + %15 = insertelement <4 x i32> %14, i32 %11, i64 3 + %16 = add <4 x i32> %15, %vec.phi + %index.next = add nuw i16 %index, 4 + %17 = icmp eq i16 %index.next, 512 + br i1 %17, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %18 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %16) + store i32 %18, ptr %arrayidx14.i, align 1 + %inc17.i = add nuw nsw i16 %j.010.i, 1 + %exitcond12.not.i = icmp eq i16 %inc17.i, 4 + br i1 %exitcond12.not.i, label %for.inc19.i, label %for.cond4.preheader.i + +for.inc19.i: ; preds = %middle.block + %inc20.i = add nuw nsw i16 %i.011.i, 1 + %exitcond13.not.i = icmp eq i16 %inc20.i, 2 + br i1 %exitcond13.not.i, label %test.exit, label %for.cond1.preheader.i + +test.exit: ; preds = %for.inc19.i + %19 = load i32, ptr @c, align 1 + ret i32 %19 +} + +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) From 52516782972730ff065a34123a9d8876da08c254 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 4 Nov 2022 11:21:28 +0800 Subject: [PATCH 212/516] [X86][CET] Add Diags for targets pre to i686 for `-fcf-protection` Intel Control-flow Enforcement Technology (CET) provides new instructions `endbr32/64` for the indirect branch control. They are NOPs on i686 and new targets. We need to check for that in case it crashes on older targets. Fixes #58737 Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D137317 --- clang/lib/Basic/Targets/X86.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 71ab946018584..d4e6097f152fd 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -241,12 +241,16 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override { - return true; + if (CPU == llvm::X86::CK_None || CPU >= llvm::X86::CK_PentiumPro) + return true; + return TargetInfo::checkCFProtectionReturnSupported(Diags); }; bool checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const override { - return true; + if (CPU == llvm::X86::CK_None || CPU >= llvm::X86::CK_PentiumPro) + return true; + return TargetInfo::checkCFProtectionBranchSupported(Diags); }; virtual bool validateOperandSize(const llvm::StringMap &FeatureMap, From 3cbf90468aecce960887e680f813cbb1209b337f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 3 Nov 2022 22:21:43 -0700 Subject: [PATCH 213/516] [X86][test] Add -fcf-protection test for pre-pentiumpro For #58737 --- clang/test/CodeGen/X86/x86-cf-protection.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/CodeGen/X86/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c index de6906ec51812..9f0cafc2eb456 100644 --- a/clang/test/CodeGen/X86/x86-cf-protection.c +++ b/clang/test/CodeGen/X86/x86-cf-protection.c @@ -4,6 +4,7 @@ // RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal -flto %s | FileCheck %s --check-prefixes=CFPROT,IBTSEAL // RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -flto %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL // RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: not %clang_cc1 -emit-llvm-only -triple i386 -target-cpu pentium-mmx -fcf-protection=branch %s 2>&1 | FileCheck %s --check-prefix=NOCFPROT // RETURN: #define __CET__ 2 // BRANCH: #define __CET__ 1 @@ -11,4 +12,7 @@ // CFPROT: !{i32 8, !"cf-protection-branch", i32 1} // IBTSEAL: !{i32 8, !"ibt-seal", i32 1} // NOIBTSEAL-NOT: "ibt-seal", i32 1 + +// NOCFPROT: error: option 'cf-protection=branch' cannot be specified on this target + void foo() {} From e604f88304e183d3ce46cea5ba6bfba2fe9fba36 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 3 Nov 2022 22:54:44 -0700 Subject: [PATCH 214/516] [X86][test] Change some CodeGen tests to use %clang_cc1 --- clang/test/CodeGen/X86/indirect-branch-cs-prefix.c | 2 +- clang/test/CodeGen/X86/mmx-inline-asm.c | 3 +-- clang/test/CodeGen/X86/mmx-shift-with-immediate.c | 2 +- clang/test/CodeGen/X86/x86-cf-protection.c | 12 ++++++------ 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c b/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c index 369db26677b4d..67d2a69bc246c 100644 --- a/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c +++ b/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c @@ -1,4 +1,4 @@ -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -mindirect-branch-cs-prefix %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -mindirect-branch-cs-prefix %s | FileCheck %s // CHECK: !{i32 4, !"indirect_branch_cs_prefix", i32 1} void foo() {} diff --git a/clang/test/CodeGen/X86/mmx-inline-asm.c b/clang/test/CodeGen/X86/mmx-inline-asm.c index 635e2a6b71efc..19c24a3a91e14 100644 --- a/clang/test/CodeGen/X86/mmx-inline-asm.c +++ b/clang/test/CodeGen/X86/mmx-inline-asm.c @@ -1,5 +1,4 @@ -// RUN: %clang -mmmx -target i386-unknown-unknown -emit-llvm -S %s -o - | FileCheck %s -// +// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s #include // CHECK: { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx } diff --git a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c index ecd1881c4875c..83be6b5517c01 100644 --- a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c +++ b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c @@ -1,4 +1,4 @@ -// RUN: %clang -mmmx -target i386-unknown-unknown -emit-llvm -S %s -o - | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s #include void shift(__m64 a, __m64 b, int c) { diff --git a/clang/test/CodeGen/X86/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c index 9f0cafc2eb456..359bad714493b 100644 --- a/clang/test/CodeGen/X86/x86-cf-protection.c +++ b/clang/test/CodeGen/X86/x86-cf-protection.c @@ -1,9 +1,9 @@ -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=return %s | FileCheck %s --check-prefix=RETURN -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=branch %s | FileCheck %s --check-prefix=BRANCH -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=full %s | FileCheck %s --check-prefix=FULL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal -flto %s | FileCheck %s --check-prefixes=CFPROT,IBTSEAL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -flto %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=return %s | FileCheck %s --check-prefix=RETURN +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=branch %s | FileCheck %s --check-prefix=BRANCH +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=full %s | FileCheck %s --check-prefix=FULL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -mibt-seal -flto %s | FileCheck %s --check-prefixes=CFPROT,IBTSEAL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -flto %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -mibt-seal %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL // RUN: not %clang_cc1 -emit-llvm-only -triple i386 -target-cpu pentium-mmx -fcf-protection=branch %s 2>&1 | FileCheck %s --check-prefix=NOCFPROT // RETURN: #define __CET__ 2 From cb30072a7211bc5b94d8d3d8d2e27a42a33d7a39 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 3 Nov 2022 23:10:19 -0700 Subject: [PATCH 215/516] [ELF] Fix duplicate work typo. NFC --- lld/ELF/AArch64ErrataFix.cpp | 2 +- lld/ELF/SyntheticSections.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 41a29a7328c28..2d10bedfcb2eb 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -55,7 +55,7 @@ static bool isADRP(uint32_t instr) { return (instr & 0x9f000000) == 0x90000000; } -// Load and store bit patterns from ARMv8-A ARM ARM. +// Load and store bit patterns from ARMv8-A. // Instructions appear in order of appearance starting from table in // C4.1.3 Loads and Stores. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 12cfc97bf5848..cb98a2daf9428 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3651,7 +3651,7 @@ size_t PPC64LongBranchTargetSection::getSize() const { void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) { // If linking non-pic we have the final addresses of the targets and they get // written to the table directly. For pic the dynamic linker will allocate - // the section and fill it it. + // the section and fill it. if (config->isPic) return; From 902ba8b0c9b013043aa04dc548be3ec907ef5571 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 31 Oct 2022 22:14:18 -0700 Subject: [PATCH 216/516] [lldb/Interpreter] Open saved transcript in GUI Editor This patch will automatically open LLDB's saved transcript file on the graphical editor if lldb is running under an interactive graphical session. This can be controlled by a new setting: `interpreter.open-transcript-in-editor` rdar://92692106 Differential Revision: https://reviews.llvm.org/D137137 Signed-off-by: Med Ismail Bennani --- .../lldb/Interpreter/CommandInterpreter.h | 3 +++ lldb/source/Interpreter/CommandInterpreter.cpp | 18 ++++++++++++++++++ .../Interpreter/InterpreterProperties.td | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h index 255f50099ebb9..a72800b5409ca 100644 --- a/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -559,6 +559,9 @@ class CommandInterpreter : public Broadcaster, bool GetSaveSessionOnQuit() const; void SetSaveSessionOnQuit(bool enable); + bool GetOpenTranscriptInEditor() const; + void SetOpenTranscriptInEditor(bool enable); + FileSpec GetSaveSessionDirectory() const; void SetSaveSessionDirectory(llvm::StringRef path); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index eaad0195c1b74..3d0b61fa7d3c3 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -170,6 +170,17 @@ void CommandInterpreter::SetSaveSessionOnQuit(bool enable) { m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, enable); } +bool CommandInterpreter::GetOpenTranscriptInEditor() const { + const uint32_t idx = ePropertyOpenTranscriptInEditor; + return m_collection_sp->GetPropertyAtIndexAsBoolean( + nullptr, idx, g_interpreter_properties[idx].default_uint_value != 0); +} + +void CommandInterpreter::SetOpenTranscriptInEditor(bool enable) { + const uint32_t idx = ePropertyOpenTranscriptInEditor; + m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, enable); +} + FileSpec CommandInterpreter::GetSaveSessionDirectory() const { const uint32_t idx = ePropertySaveSessionDirectory; return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr, idx); @@ -3226,6 +3237,13 @@ bool CommandInterpreter::SaveTranscript( result.AppendMessageWithFormat("Session's transcripts saved to %s\n", output_file->c_str()); + if (GetOpenTranscriptInEditor() && Host::IsInteractiveGraphicSession()) { + const FileSpec file_spec; + error = file->GetFileSpec(const_cast(file_spec)); + if (error.Success()) + Host::OpenFileInExternalEditor(file_spec, 1); + } + return true; } diff --git a/lldb/source/Interpreter/InterpreterProperties.td b/lldb/source/Interpreter/InterpreterProperties.td index c0acc044fb7fe..2155ee61ccffb 100644 --- a/lldb/source/Interpreter/InterpreterProperties.td +++ b/lldb/source/Interpreter/InterpreterProperties.td @@ -13,6 +13,10 @@ let Definition = "interpreter" in { Global, DefaultFalse, Desc<"If true, LLDB will save the session's transcripts before quitting.">; + def OpenTranscriptInEditor: Property<"open-transcript-in-editor", "Boolean">, + Global, + DefaultTrue, + Desc<"If true, LLDB will open the saved session's transcripts in the external editor.">; def SaveSessionDirectory: Property<"save-session-directory", "FileSpec">, DefaultStringValue<"">, Desc<"A path where LLDB will save the session's transcripts. This is particularly useful when you can't set the session file, for example when using `save-session-on-quit`.">; From bf1e235695a7acdc3e868217e69d5b31ada06cb3 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Sun, 23 Oct 2022 17:32:58 +0200 Subject: [PATCH 217/516] Implement CWG2631 Implement https://cplusplus.github.io/CWG/issues/2631.html. Immediate calls in default arguments and defaults members are not evaluated. Instead, we evaluate them when constructing a `CXXDefaultArgExpr`/`BuildCXXDefaultInitExpr`. The immediate calls are executed by doing a transform on the initializing expression. Note that lambdas are not considering subexpressions so we do not need to transform them. As a result of this patch, unused default member initializers are not considered odr-used, and errors about members binding to local variables in an outer scope only surface at the point where a constructor is defined. Reviewed By: aaron.ballman, #clang-language-wg Differential Revision: https://reviews.llvm.org/D136554 --- clang/docs/ReleaseNotes.rst | 5 + clang/include/clang/AST/ExprCXX.h | 108 ++++++-- clang/include/clang/AST/Stmt.h | 7 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Sema/Sema.h | 87 +++++- clang/lib/AST/ASTImporter.cpp | 21 +- clang/lib/AST/ExprCXX.cpp | 68 ++++- clang/lib/Parse/ParseCXXInlineMethods.cpp | 5 + clang/lib/Parse/ParseDeclCXX.cpp | 6 +- clang/lib/Sema/SemaDeclCXX.cpp | 90 ++---- clang/lib/Sema/SemaExpr.cpp | 256 ++++++++++++++++-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- clang/lib/Sema/TreeTransform.h | 21 +- clang/lib/Serialization/ASTReaderStmt.cpp | 12 +- clang/lib/Serialization/ASTWriterStmt.cpp | 6 + clang/test/CXX/class/class.local/p1-0x.cpp | 4 +- .../default-arguments-with-immediate.cpp | 54 ++++ .../default-argument-with-immediate-calls.cpp | 34 +++ .../cxx2a-consteval-default-params.cpp | 68 +++++ clang/test/SemaCXX/source_location.cpp | 64 ++++- 20 files changed, 784 insertions(+), 144 deletions(-) create mode 100644 clang/test/CodeGenCXX/default-arguments-with-immediate.cpp create mode 100644 clang/test/PCH/default-argument-with-immediate-calls.cpp create mode 100644 clang/test/SemaCXX/cxx2a-consteval-default-params.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ad1a00b4bbcc4..bd2d811b30668 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -578,6 +578,11 @@ C++ Language Changes in Clang conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. - Implemented DR2358 allowing init captures in lambdas in default arguments. +- Implemented DR2631. Invalid ``consteval`` calls in default arguments and default + member initializers are diagnosed when and if the default is used. + This Fixes `Issue 56379 `_ + and changes the value of ``std::source_location::current()`` + used in default parameters calls compared to previous versions of Clang. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 0b927c0294752..098720d9469f0 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -1245,8 +1245,12 @@ class CXXThrowExpr : public Expr { /// This wraps up a function call argument that was created from the /// corresponding parameter's default argument, when the call did not /// explicitly supply arguments for all of the parameters. -class CXXDefaultArgExpr final : public Expr { +class CXXDefaultArgExpr final + : public Expr, + private llvm::TrailingObjects { friend class ASTStmtReader; + friend class ASTReader; + friend TrailingObjects; /// The parameter whose default is being used. ParmVarDecl *Param; @@ -1255,7 +1259,7 @@ class CXXDefaultArgExpr final : public Expr { DeclContext *UsedContext; CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *Param, - DeclContext *UsedContext) + Expr *RewrittenExpr, DeclContext *UsedContext) : Expr(SC, Param->hasUnparsedDefaultArg() ? Param->getType().getNonReferenceType() @@ -1264,28 +1268,58 @@ class CXXDefaultArgExpr final : public Expr { Param->getDefaultArg()->getObjectKind()), Param(Param), UsedContext(UsedContext) { CXXDefaultArgExprBits.Loc = Loc; + CXXDefaultArgExprBits.HasRewrittenInit = RewrittenExpr != nullptr; + if (RewrittenExpr) + *getTrailingObjects() = RewrittenExpr; setDependence(computeDependence(this)); } + CXXDefaultArgExpr(EmptyShell Empty, bool HasRewrittenInit) + : Expr(CXXDefaultArgExprClass, Empty) { + CXXDefaultArgExprBits.HasRewrittenInit = HasRewrittenInit; + } + + size_t numTrailingObjects() const { + return CXXDefaultArgExprBits.HasRewrittenInit; + } + public: - CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {} + static CXXDefaultArgExpr *CreateEmpty(const ASTContext &C, + bool HasRewrittenInit); // \p Param is the parameter whose default argument is used by this // expression. static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, - DeclContext *UsedContext) { - return new (C) - CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, UsedContext); - } - + ParmVarDecl *Param, Expr *RewrittenExpr, + DeclContext *UsedContext); // Retrieve the parameter that the argument was created from. const ParmVarDecl *getParam() const { return Param; } ParmVarDecl *getParam() { return Param; } - // Retrieve the actual argument to the function call. - const Expr *getExpr() const { return getParam()->getDefaultArg(); } - Expr *getExpr() { return getParam()->getDefaultArg(); } + bool hasRewrittenInit() const { + return CXXDefaultArgExprBits.HasRewrittenInit; + } + + // Retrieve the argument to the function call. + Expr *getExpr(); + const Expr *getExpr() const { + return const_cast(this)->getExpr(); + } + + Expr *getRewrittenExpr() { + return hasRewrittenInit() ? *getTrailingObjects() : nullptr; + } + + const Expr *getRewrittenExpr() const { + return const_cast(this)->getRewrittenExpr(); + } + + // Retrieve the rewritten init expression (for an init expression containing + // immediate calls) with the top level FullExpr and ConstantExpr stripped off. + Expr *getAdjustedRewrittenExpr(); + const Expr *getAdjustedRewrittenExpr() const { + return const_cast(this)->getAdjustedRewrittenExpr(); + } const DeclContext *getUsedContext() const { return UsedContext; } DeclContext *getUsedContext() { return UsedContext; } @@ -1322,10 +1356,13 @@ class CXXDefaultArgExpr final : public Expr { /// is implicitly used in a mem-initializer-list in a constructor /// (C++11 [class.base.init]p8) or in aggregate initialization /// (C++1y [dcl.init.aggr]p7). -class CXXDefaultInitExpr : public Expr { - friend class ASTReader; - friend class ASTStmtReader; +class CXXDefaultInitExpr final + : public Expr, + private llvm::TrailingObjects { + friend class ASTStmtReader; + friend class ASTReader; + friend TrailingObjects; /// The field whose default is being used. FieldDecl *Field; @@ -1333,16 +1370,29 @@ class CXXDefaultInitExpr : public Expr { DeclContext *UsedContext; CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, QualType Ty, DeclContext *UsedContext); + FieldDecl *Field, QualType Ty, DeclContext *UsedContext, + Expr *RewrittenInitExpr); + + CXXDefaultInitExpr(EmptyShell Empty, bool HasRewrittenInit) + : Expr(CXXDefaultInitExprClass, Empty) { + CXXDefaultInitExprBits.HasRewrittenInit = HasRewrittenInit; + } - CXXDefaultInitExpr(EmptyShell Empty) : Expr(CXXDefaultInitExprClass, Empty) {} + size_t numTrailingObjects() const { + return CXXDefaultInitExprBits.HasRewrittenInit; + } public: + static CXXDefaultInitExpr *CreateEmpty(const ASTContext &C, + bool HasRewrittenInit); /// \p Field is the non-static data member whose default initializer is used /// by this expression. static CXXDefaultInitExpr *Create(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, DeclContext *UsedContext) { - return new (Ctx) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), UsedContext); + FieldDecl *Field, DeclContext *UsedContext, + Expr *RewrittenInitExpr); + + bool hasRewrittenInit() const { + return CXXDefaultInitExprBits.HasRewrittenInit; } /// Get the field whose initializer will be used. @@ -1350,13 +1400,23 @@ class CXXDefaultInitExpr : public Expr { const FieldDecl *getField() const { return Field; } /// Get the initialization expression that will be used. + Expr *getExpr(); const Expr *getExpr() const { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - return Field->getInClassInitializer(); + return const_cast(this)->getExpr(); + } + + /// Retrieve the initializing expression with evaluated immediate calls, if + /// any. + const Expr *getRewrittenExpr() const { + assert(hasRewrittenInit() && "expected a rewritten init expression"); + return *getTrailingObjects(); } - Expr *getExpr() { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - return Field->getInClassInitializer(); + + /// Retrieve the initializing expression with evaluated immediate calls, if + /// any. + Expr *getRewrittenExpr() { + assert(hasRewrittenInit() && "expected a rewritten init expression"); + return *getTrailingObjects(); } const DeclContext *getUsedContext() const { return UsedContext; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 49a66a1ea5b86..a894111be896a 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -686,6 +686,9 @@ class alignas(void *) Stmt { unsigned : NumExprBits; + /// Whether this CXXDefaultArgExpr rewrote its argument and stores a copy. + unsigned HasRewrittenInit : 1; + /// The location where the default argument expression was used. SourceLocation Loc; }; @@ -696,6 +699,10 @@ class alignas(void *) Stmt { unsigned : NumExprBits; + /// Whether this CXXDefaultInitExprBitfields rewrote its argument and stores + /// a copy. + unsigned HasRewrittenInit : 1; + /// The location where the default initializer expression was used. SourceLocation Loc; }; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1b1db765fa7a9..a720da687550a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2644,6 +2644,10 @@ def err_invalid_consteval_take_address : Error< " of an immediate invocation">; def err_invalid_consteval_call : Error< "call to consteval function %q0 is not a constant expression">; +def note_invalid_consteval_initializer : Note< + "in the default initalizer of %0">; +def note_invalid_consteval_initializer_here : Note< + "initialized here %0">; def err_invalid_consteval_decl_kind : Error< "%0 cannot be declared consteval">; def err_invalid_constexpr : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e8c9cb966bae7..f5151ac7f4c3e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1330,6 +1330,25 @@ class Sema final { bool InDiscardedStatement; bool InImmediateFunctionContext; + bool IsCurrentlyCheckingDefaultArgumentOrInitializer = false; + + // When evaluating immediate functions in the initializer of a default + // argument or default member initializer, this is the declaration whose + // default initializer is being evaluated and the location of the call + // or constructor definition. + struct InitializationContext { + InitializationContext(SourceLocation Loc, ValueDecl *Decl, + DeclContext *Context) + : Loc(Loc), Decl(Decl), Context(Context) { + assert(Decl && Context && "invalid initialization context"); + }; + + SourceLocation Loc; + ValueDecl *Decl = nullptr; + DeclContext *Context = nullptr; + }; + llvm::Optional DelayedDefaultInitializationContext; + ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -6196,19 +6215,22 @@ class Sema final { bool IsStdInitListInitialization, bool RequiresZeroInit, unsigned ConstructKind, SourceRange ParenRange); + ExprResult ConvertMemberDefaultInitExpression(FieldDecl *FD, Expr *InitExpr, + SourceLocation InitLoc); + ExprResult BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field); /// Instantiate or parse a C++ default argument expression as necessary. /// Return true on error. bool CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param); + ParmVarDecl *Param, Expr *Init = nullptr, + bool SkipImmediateInvocations = true); /// BuildCXXDefaultArgExpr - Creates a CXXDefaultArgExpr, instantiating /// the default expr if needed. - ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, - ParmVarDecl *Param); + ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, + ParmVarDecl *Param, Expr *Init = nullptr); /// FinalizeVarWithDestructor - Prepare for calling destructor on the /// constructed variable. @@ -9612,6 +9634,63 @@ class Sema final { return ExprEvalContexts.back().isImmediateFunctionContext(); } + bool isCheckingDefaultArgumentOrInitializer() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + const ExpressionEvaluationContextRecord &Ctx = ExprEvalContexts.back(); + return (Ctx.Context == + ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || + Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer; + } + + bool isCheckingDefaultArgumentOrInitializerOfOuterEntity() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if ((Ctx.Context == + ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || + Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer) + return true; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + return false; + } + return false; + } + + llvm::Optional + InnermostDeclarationWithDelayedImmediateInvocations() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && + Ctx.DelayedDefaultInitializationContext) + return Ctx.DelayedDefaultInitializationContext; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + break; + } + return llvm::None; + } + + llvm::Optional + OutermostDeclarationWithDelayedImmediateInvocations() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + llvm::Optional + Res; + for (auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && + !Ctx.DelayedDefaultInitializationContext && Res) + break; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + break; + Res = Ctx.DelayedDefaultInitializationContext; + } + return Res; + } + /// RAII class used to determine whether SFINAE has /// trapped any errors that occur during template argument /// deduction. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 631dfaebabbd6..188c0f593a9ef 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -7687,9 +7687,16 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (Error Err = ImportDefaultArgOfParmVarDecl(*FromParam, ToParam)) return std::move(Err); } - + Expr *RewrittenInit = nullptr; + if (E->hasRewrittenInit()) { + ExpectedExpr ExprOrErr = import(E->getExpr()); + if (!ExprOrErr) + return ExprOrErr.takeError(); + RewrittenInit = ExprOrErr.get(); + } return CXXDefaultArgExpr::Create(Importer.getToContext(), *ToUsedLocOrErr, - *ToParamOrErr, *UsedContextOrErr); + *ToParamOrErr, RewrittenInit, + *UsedContextOrErr); } ExpectedStmt @@ -8381,8 +8388,16 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { ToField->setInClassInitializer(*ToInClassInitializerOrErr); } + Expr *RewrittenInit = nullptr; + if (E->hasRewrittenInit()) { + ExpectedExpr ExprOrErr = import(E->getExpr()); + if (!ExprOrErr) + return ExprOrErr.takeError(); + RewrittenInit = ExprOrErr.get(); + } + return CXXDefaultInitExpr::Create(Importer.getToContext(), *ToBeginLocOrErr, - ToField, *UsedContextOrErr); + ToField, *UsedContextOrErr, RewrittenInit); } ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 3bf3eab72846c..6a6f692dec787 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -949,9 +949,43 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const { return cast(getCalleeDecl())->getLiteralIdentifier(); } +CXXDefaultArgExpr *CXXDefaultArgExpr::CreateEmpty(const ASTContext &C, + bool HasRewrittenInit) { + size_t Size = totalSizeToAlloc(HasRewrittenInit); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultArgExpr(EmptyShell(), HasRewrittenInit); +} + +CXXDefaultArgExpr *CXXDefaultArgExpr::Create(const ASTContext &C, + SourceLocation Loc, + ParmVarDecl *Param, + Expr *RewrittenExpr, + DeclContext *UsedContext) { + size_t Size = totalSizeToAlloc(RewrittenExpr != nullptr); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, + RewrittenExpr, UsedContext); +} + +Expr *CXXDefaultArgExpr::getExpr() { + return CXXDefaultArgExprBits.HasRewrittenInit ? getAdjustedRewrittenExpr() + : getParam()->getDefaultArg(); +} + +Expr *CXXDefaultArgExpr::getAdjustedRewrittenExpr() { + assert(hasRewrittenInit() && + "expected this CXXDefaultArgExpr to have a rewritten init."); + Expr *Init = getRewrittenExpr(); + if (auto *E = dyn_cast_if_present(Init)) + if (!isa(E)) + return E->getSubExpr(); + return Init; +} + CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, FieldDecl *Field, - QualType Ty, DeclContext *UsedContext) + QualType Ty, DeclContext *UsedContext, + Expr *RewrittenInitExpr) : Expr(CXXDefaultInitExprClass, Ty.getNonLValueExprType(Ctx), Ty->isLValueReferenceType() ? VK_LValue : Ty->isRValueReferenceType() ? VK_XValue @@ -959,11 +993,43 @@ CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, /*FIXME*/ OK_Ordinary), Field(Field), UsedContext(UsedContext) { CXXDefaultInitExprBits.Loc = Loc; + CXXDefaultInitExprBits.HasRewrittenInit = RewrittenInitExpr != nullptr; + + if (CXXDefaultInitExprBits.HasRewrittenInit) + *getTrailingObjects() = RewrittenInitExpr; + assert(Field->hasInClassInitializer()); setDependence(computeDependence(this)); } +CXXDefaultInitExpr *CXXDefaultInitExpr::CreateEmpty(const ASTContext &C, + bool HasRewrittenInit) { + size_t Size = totalSizeToAlloc(HasRewrittenInit); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultInitExpr)); + return new (Mem) CXXDefaultInitExpr(EmptyShell(), HasRewrittenInit); +} + +CXXDefaultInitExpr *CXXDefaultInitExpr::Create(const ASTContext &Ctx, + SourceLocation Loc, + FieldDecl *Field, + DeclContext *UsedContext, + Expr *RewrittenInitExpr) { + + size_t Size = totalSizeToAlloc(RewrittenInitExpr != nullptr); + auto *Mem = Ctx.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), + UsedContext, RewrittenInitExpr); +} + +Expr *CXXDefaultInitExpr::getExpr() { + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + if (hasRewrittenInit()) + return getRewrittenExpr(); + + return Field->getInClassInitializer(); +} + CXXTemporary *CXXTemporary::Create(const ASTContext &C, const CXXDestructorDecl *Destructor) { return new (C) CXXTemporary(Destructor); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index d918ea26b9d9d..3a7f5426d4a70 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -648,6 +648,11 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) { Actions.ActOnStartCXXInClassMemberInitializer(); + // The initializer isn't actually potentially evaluated unless it is + // used. + EnterExpressionEvaluationContext Eval( + Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed); + ExprResult Init = ParseCXXMemberInitializer(MI.Field, /*IsFunction=*/false, EqualLoc); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index bbffff5394f04..a2f07ea5d59fc 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3184,7 +3184,11 @@ ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction, "Data member initializer not starting with '=' or '{'"); EnterExpressionEvaluationContext Context( - Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, D); + Actions, + isa_and_present(D) + ? Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed + : Sema::ExpressionEvaluationContext::PotentiallyEvaluated, + D); if (TryConsumeToken(tok::equal, EqualLoc)) { if (Tok.is(tok::kw_delete)) { // In principle, an initializer of '= delete p;' is legal, but it will diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index ea7997b347959..c78ce37f372a0 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -4039,6 +4039,21 @@ ExprResult Sema::ActOnRequiresClause(ExprResult ConstraintExpr) { return ConstraintExpr; } +ExprResult Sema::ConvertMemberDefaultInitExpression(FieldDecl *FD, + Expr *InitExpr, + SourceLocation InitLoc) { + InitializedEntity Entity = + InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); + InitializationKind Kind = + FD->getInClassInitStyle() == ICIS_ListInit + ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), + InitExpr->getBeginLoc(), + InitExpr->getEndLoc()) + : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); + InitializationSequence Seq(*this, Entity, Kind, InitExpr); + return Seq.Perform(*this, Entity, Kind, InitExpr); +} + /// This is invoked after parsing an in-class initializer for a /// non-static C++ class member, and after instantiating an in-class initializer /// in a class template. Such actions are deferred until the class is complete. @@ -4067,16 +4082,7 @@ void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D, ExprResult Init = InitExpr; if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) { - InitializedEntity Entity = - InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); - InitializationKind Kind = - FD->getInClassInitStyle() == ICIS_ListInit - ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), - InitExpr->getBeginLoc(), - InitExpr->getEndLoc()) - : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); - InitializationSequence Seq(*this, Entity, Kind, InitExpr); - Init = Seq.Perform(*this, Entity, Kind, InitExpr); + Init = ConvertMemberDefaultInitExpression(FD, InitExpr, InitLoc); if (Init.isInvalid()) { FD->setInvalidDecl(); return; @@ -15617,70 +15623,6 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, Constructor); } -ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - assert(Field->hasInClassInitializer()); - - // If we already have the in-class initializer nothing needs to be done. - if (Field->getInClassInitializer()) - return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); - - // If we might have already tried and failed to instantiate, don't try again. - if (Field->isInvalidDecl()) - return ExprError(); - - // Maybe we haven't instantiated the in-class initializer. Go check the - // pattern FieldDecl to see if it has one. - CXXRecordDecl *ParentRD = cast(Field->getParent()); - - if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { - CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); - DeclContext::lookup_result Lookup = - ClassPattern->lookup(Field->getDeclName()); - - FieldDecl *Pattern = nullptr; - for (auto *L : Lookup) { - if (isa(L)) { - Pattern = cast(L); - break; - } - } - assert(Pattern && "We must have set the Pattern!"); - - if (!Pattern->hasInClassInitializer() || - InstantiateInClassInitializer(Loc, Field, Pattern, - getTemplateInstantiationArgs(Field))) { - // Don't diagnose this again. - Field->setInvalidDecl(); - return ExprError(); - } - return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); - } - - // DR1351: - // If the brace-or-equal-initializer of a non-static data member - // invokes a defaulted default constructor of its class or of an - // enclosing class in a potentially evaluated subexpression, the - // program is ill-formed. - // - // This resolution is unworkable: the exception specification of the - // default constructor can be needed in an unevaluated context, in - // particular, in the operand of a noexcept-expression, and we can be - // unable to compute an exception specification for an enclosed class. - // - // Any attempt to resolve the exception specification of a defaulted default - // constructor before the initializer is lexically complete will ultimately - // come here at which point we can diagnose it. - RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); - Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) - << OutermostClass << Field; - Diag(Field->getEndLoc(), - diag::note_default_member_initializer_not_yet_parsed); - // Recover by marking the field invalid, unless we're in a SFINAE context. - if (!isSFINAEContext()) - Field->setInvalidDecl(); - return ExprError(); -} - void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) { if (VD->isInvalidDecl()) return; // If initializing the variable failed, don't also diagnose problems with diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2493b4a76d5e1..2849bf45ead5b 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5856,8 +5856,10 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param) { + ParmVarDecl *Param, Expr *RewrittenInit, + bool SkipImmediateInvocations) { if (Param->hasUnparsedDefaultArg()) { + assert(!RewrittenInit && "Should not have a rewritten init expression yet"); // If we've already cleared out the location for the default argument, // that means we're parsing it right now. if (!UnparsedDefaultArgLocs.count(Param)) { @@ -5874,11 +5876,14 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, return true; } - if (Param->hasUninstantiatedDefaultArg() && - InstantiateDefaultArgument(CallLoc, FD, Param)) - return true; + if (Param->hasUninstantiatedDefaultArg()) { + assert(!RewrittenInit && "Should not have a rewitten init expression yet"); + if (InstantiateDefaultArgument(CallLoc, FD, Param)) + return true; + } - assert(Param->hasInit() && "default argument but no initializer?"); + Expr *Init = RewrittenInit ? RewrittenInit : Param->getInit(); + assert(Init && "default argument but no initializer?"); // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll @@ -5887,34 +5892,237 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. - if (auto Init = dyn_cast(Param->getInit())) { + if (auto *InitWithCleanup = dyn_cast(Init)) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. - Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); - + Cleanup.setExprNeedsCleanups(InitWithCleanup->cleanupsHaveSideEffects()); // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. - assert(!Init->getNumObjects() && + assert(!InitWithCleanup->getNumObjects() && "default argument expression has capturing blocks?"); } - - // We already type-checked the argument, so we know it works. - // Just mark all of the declarations in this potentially-evaluated expression - // as being "referenced". EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), - /*SkipLocalVariables=*/true); + ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer = + SkipImmediateInvocations; + MarkDeclarationsReferencedInExpr(Init, /*SkipLocalVariables*/ true); return false; } +struct ImmediateCallVisitor : public RecursiveASTVisitor { + bool HasImmediateCalls = false; + + bool VisitCallExpr(CallExpr *E) { + if (const FunctionDecl *FD = E->getDirectCallee()) + HasImmediateCalls |= FD->isConsteval(); + return RecursiveASTVisitor::VisitStmt(E); + } + + // SourceLocExpr are not immediate invocations + // but CXXDefaultInitExpr/CXXDefaultArgExpr containing a SourceLocExpr + // need to be rebuilt so that they refer to the correct SourceLocation and + // DeclContext. + bool VisitSourceLocExpr(SourceLocExpr *E) { + HasImmediateCalls = true; + return RecursiveASTVisitor::VisitStmt(E); + } + + // A nested lambda might have parameters with immediate invocations + // in their default arguments. + // The compound statement is not visited (as it does not constitute a + // subexpression). + // FIXME: We should consider visiting and transforming captures + // with init expressions. + bool VisitLambdaExpr(LambdaExpr *E) { + return VisitCXXMethodDecl(E->getCallOperator()); + } + + // Blocks don't support default parameters, and, as for lambdas, + // we don't consider their body a subexpression. + bool VisitBlockDecl(BlockDecl *B) { return false; } + + bool VisitCompoundStmt(CompoundStmt *B) { + assert("Unexpected Compound statement in default parameter or initializer"); + return false; + } + + bool VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { + return TraverseStmt(E->getExpr()); + } + + bool VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { + return TraverseStmt(E->getExpr()); + } +}; + +struct EnsureImmediateInvocationInDefaultArgs + : TreeTransform { + EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef) + : TreeTransform(SemaRef) {} + + // Lambda can only have immediate invocations in the default + // args of their parameters, which is transformed upon calling the closure. + // The body is not a subexpression, so we have nothing to do. + // FIXME: Immediate calls in capture initializers should be transformed. + ExprResult TransformLambdaExpr(LambdaExpr *E) { return E; } + ExprResult TransformBlockExpr(BlockExpr *E) { return E; } +}; + ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, ParmVarDecl *Param) { + FunctionDecl *FD, ParmVarDecl *Param, + Expr *Init) { assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); - if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) + + bool NestedDefaultChecking = + isCheckingDefaultArgumentOrInitializerOfOuterEntity(); + + llvm::Optional + InitializationContext = + OutermostDeclarationWithDelayedImmediateInvocations(); + if (!InitializationContext.has_value()) + InitializationContext.emplace(CallLoc, Param, CurContext); + + if (!Init && !Param->hasUnparsedDefaultArg()) { + // Mark that we are replacing a default argument first. + // If we are instantiating a template we won't have to + // retransform immediate calls. + EnterExpressionEvaluationContext EvalContext( + *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); + ExprEvalContexts.back().DelayedDefaultInitializationContext = { + CallLoc, Param, CurContext}; + + if (Param->hasUninstantiatedDefaultArg()) { + if (InstantiateDefaultArgument(CallLoc, FD, Param)) + return ExprError(); + } else { + // CWG2631 + // An immediate invocation that is not evaluated where it appears is + // evaluated and checked for whether it is a constant expression at the + // point where the enclosing initializer is used in a function call. + ImmediateCallVisitor V; + if (!NestedDefaultChecking) + V.TraverseDecl(Param); + if (V.HasImmediateCalls) { + EnsureImmediateInvocationInDefaultArgs Immediate(*this); + ExprResult Res = Immediate.TransformExpr(Param->getInit()); + if (Res.isInvalid()) + return ExprError(); + Res = ConvertParamDefaultArgument(Param, Res.get(), + Res.get()->getBeginLoc()); + if (Res.isInvalid()) + return ExprError(); + Init = Res.get(); + } + } + } + + if (CheckCXXDefaultArgExpr( + CallLoc, FD, Param, Init, + /*SkipImmediateInvocations=*/NestedDefaultChecking)) return ExprError(); - return CXXDefaultArgExpr::Create(Context, CallLoc, Param, CurContext); + + return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param, + Init, InitializationContext->Context); +} + +ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { + assert(Field->hasInClassInitializer()); + + // If we might have already tried and failed to instantiate, don't try again. + if (Field->isInvalidDecl()) + return ExprError(); + + auto *ParentRD = cast(Field->getParent()); + + llvm::Optional + InitializationContext = + OutermostDeclarationWithDelayedImmediateInvocations(); + if (!InitializationContext.has_value()) + InitializationContext.emplace(Loc, Field, CurContext); + + Expr *Init = nullptr; + + bool NestedDefaultChecking = + isCheckingDefaultArgumentOrInitializerOfOuterEntity(); + + if (!Field->getInClassInitializer()) { + // Maybe we haven't instantiated the in-class initializer. Go check the + // pattern FieldDecl to see if it has one. + if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { + CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); + DeclContext::lookup_result Lookup = + ClassPattern->lookup(Field->getDeclName()); + + FieldDecl *Pattern = nullptr; + for (auto *L : Lookup) { + if ((Pattern = dyn_cast(L))) + break; + } + assert(Pattern && "We must have set the Pattern!"); + if (!Pattern->hasInClassInitializer() || + InstantiateInClassInitializer(Loc, Field, Pattern, + getTemplateInstantiationArgs(Field))) { + Field->setInvalidDecl(); + return ExprError(); + } + } + } else { + // CWG2631 + // An immediate invocation that is not evaluated where it appears is + // evaluated and checked for whether it is a constant expression at the + // point where the enclosing initializer is used in a [...] a constructor + // definition, or an aggregate initialization. + EnterExpressionEvaluationContext EvalContext( + *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field); + ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field, + CurContext}; + + ImmediateCallVisitor V; + if (!NestedDefaultChecking) + V.TraverseDecl(Field); + if (V.HasImmediateCalls) { + EnsureImmediateInvocationInDefaultArgs Immediate(*this); + ExprResult Res = Immediate.TransformExpr(Field->getInClassInitializer()); + if (!Res.isInvalid()) + Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc); + if (Res.isInvalid()) { + Field->setInvalidDecl(); + return ExprError(); + } + Init = Res.get(); + } else if (!NestedDefaultChecking) { + MarkDeclarationsReferencedInExpr(Field->getInClassInitializer()); + } + } + if (Field->getInClassInitializer()) + return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc, + Field, InitializationContext->Context, + Init); + + // DR1351: + // If the brace-or-equal-initializer of a non-static data member + // invokes a defaulted default constructor of its class or of an + // enclosing class in a potentially evaluated subexpression, the + // program is ill-formed. + // + // This resolution is unworkable: the exception specification of the + // default constructor can be needed in an unevaluated context, in + // particular, in the operand of a noexcept-expression, and we can be + // unable to compute an exception specification for an enclosed class. + // + // Any attempt to resolve the exception specification of a defaulted default + // constructor before the initializer is lexically complete will ultimately + // come here at which point we can diagnose it. + RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); + Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) + << OutermostClass << Field; + Diag(Field->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); + // Recover by marking the field invalid, unless we're in a SFINAE context. + if (!isSFINAEContext()) + Field->setInvalidDecl(); + return ExprError(); } Sema::VariadicCallType @@ -17539,6 +17747,7 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { if (isUnevaluatedContext() || !E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() || + isCheckingDefaultArgumentOrInitializer() || RebuildingImmediateInvocation || isImmediateFunctionContext()) return E; @@ -17584,8 +17793,14 @@ static void EvaluateAndDiagnoseImmediateInvocation( FD = Call->getConstructor(); else llvm_unreachable("unhandled decl kind"); - assert(FD->isConsteval()); + assert(FD && FD->isConsteval()); SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call) << FD; + if (auto Context = + SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) { + SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer) + << Context->Decl; + SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at); + } for (auto &Note : Notes) SemaRef.Diag(Note.first, Note.second); return; @@ -19731,7 +19946,8 @@ void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { if (auto *FD = dyn_cast(E->getDecl())) if (!isUnevaluatedContext() && !isConstantEvaluated() && - !isImmediateFunctionContext() && FD->isConsteval() && + !isImmediateFunctionContext() && + !isCheckingDefaultArgumentOrInitializer() && FD->isConsteval() && !RebuildingImmediateInvocation && !FD->isDependentContext()) ExprEvalContexts.back().ReferenceToConsteval.insert(E); MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 9e41dfbfdbe95..709162e01809b 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1978,9 +1978,9 @@ ExprResult TemplateInstantiator::TransformCXXDefaultArgExpr( assert(!cast(E->getParam()->getDeclContext())-> getDescribedFunctionTemplate() && "Default arg expressions are never formed in dependent cases."); - return SemaRef.BuildCXXDefaultArgExpr(E->getUsedLocation(), - cast(E->getParam()->getDeclContext()), - E->getParam()); + return SemaRef.BuildCXXDefaultArgExpr( + E->getUsedLocation(), cast(E->getParam()->getDeclContext()), + E->getParam()); } template @@ -3407,6 +3407,8 @@ bool Sema::InstantiateInClassInitializer( ContextRAII SavedContext(*this, Instantiation->getParent()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + ExprEvalContexts.back().DelayedDefaultInitializationContext = { + PointOfInstantiation, Instantiation, CurContext}; LocalInstantiationScope Scope(*this, true); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index ab34a9d611b9c..ead72463aca78 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3172,9 +3172,10 @@ class TreeTransform { /// By default, builds a new default-argument expression, which does not /// require any semantic analysis. Subclasses may override this routine to /// provide different behavior. - ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) { + ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param, + Expr *RewrittenExpr) { return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param, - getSema().CurContext); + RewrittenExpr, getSema().CurContext); } /// Build a new C++11 default-initialization expression. @@ -3184,8 +3185,7 @@ class TreeTransform { /// routine to provide different behavior. ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field, - getSema().CurContext); + return getSema().BuildCXXDefaultInitExpr(Loc, Field); } /// Build a new C++ zero-initialization expression. @@ -12094,11 +12094,20 @@ TreeTransform::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (!Param) return ExprError(); + ExprResult InitRes; + if (E->hasRewrittenInit()) { + InitRes = getDerived().TransformExpr(E->getRewrittenExpr()); + if (InitRes.isInvalid()) + return ExprError(); + } + if (!getDerived().AlwaysRebuild() && Param == E->getParam() && - E->getUsedContext() == SemaRef.CurContext) + E->getUsedContext() == SemaRef.CurContext && + InitRes.get() == E->getRewrittenExpr()) return E; - return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param); + return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param, + InitRes.get()); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 2a3c6e7231785..08f9f0bf50d03 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1824,6 +1824,9 @@ void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { E->Param = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultArgExprBits.Loc = readSourceLocation(); + E->CXXDefaultArgExprBits.HasRewrittenInit = Record.readInt(); + if (E->CXXDefaultArgExprBits.HasRewrittenInit) + *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { @@ -1831,6 +1834,9 @@ void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { E->Field = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultInitExprBits.Loc = readSourceLocation(); + E->CXXDefaultInitExprBits.HasRewrittenInit = Record.readInt(); + if (E->CXXDefaultInitExprBits.HasRewrittenInit) + *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { @@ -3829,11 +3835,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_DEFAULT_ARG: - S = new (Context) CXXDefaultArgExpr(Empty); + S = CXXDefaultArgExpr::CreateEmpty( + Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); break; case EXPR_CXX_DEFAULT_INIT: - S = new (Context) CXXDefaultInitExpr(Empty); + S = CXXDefaultInitExpr::CreateEmpty( + Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); break; case EXPR_CXX_BIND_TEMPORARY: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e2ba69ca1eec8..6e4101ac122ee 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1745,6 +1745,9 @@ void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Record.AddDeclRef(E->getParam()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getUsedLocation()); + Record.push_back(E->hasRewrittenInit()); + if (E->hasRewrittenInit()) + Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_ARG; } @@ -1753,6 +1756,9 @@ void ASTStmtWriter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { Record.AddDeclRef(E->getField()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getExprLoc()); + Record.push_back(E->hasRewrittenInit()); + if (E->hasRewrittenInit()) + Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_INIT; } diff --git a/clang/test/CXX/class/class.local/p1-0x.cpp b/clang/test/CXX/class/class.local/p1-0x.cpp index 49125f5f9b062..096f5080099ec 100644 --- a/clang/test/CXX/class/class.local/p1-0x.cpp +++ b/clang/test/CXX/class/class.local/p1-0x.cpp @@ -11,8 +11,8 @@ void f() { int x = 3; // expected-note{{'x' declared here}} struct C { int& x2 = x; // expected-error{{reference to local variable 'x' declared in enclosing lambda expression}} - }; + }c; // expected-note {{required here}} }; - C(); + C(); // expected-note {{required here}} } diff --git a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp new file mode 100644 index 0000000000000..54a02ffc06836 --- /dev/null +++ b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -std=c++2a -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +consteval int immediate() { return 0;} +static int ext(); +void f(int a = immediate() + ext()); + +void test_function() { + f(); + f(0); + // CHECK: call noundef i32 @_ZL3extv() + // CHECK: add + // CHECK: call {{.*}} @_Z1fi + // CHECK: call {{.*}} @_Z1fi +} + +// CHECK: define {{.*}} i32 @_ZL3extv() + +static constexpr int not_immediate(); +struct A { + int a = immediate() + not_immediate(); +}; + +void test_member() { + // CHECK: call void @_ZN1AC2Ev + A defaulted; + // CHECK-NOT: call void @_ZN1AC2Ev + A provided{0}; +} + +// CHECK: define {{.*}} void @_ZN1AC2Ev{{.*}} +// CHECK: %call = call noundef i32 @_ZL13not_immediatev() + +int never_referenced() {return 42;}; + + +namespace not_used { + +struct A { + int a = immediate() + never_referenced(); +}; +void f(int a = immediate() + never_referenced()); + +void g() { + A a{0}; + f(0); +} + +} + +static int ext() {return 0;} +static constexpr int not_immediate() {return 0;} + +// CHECK-NOT: define {{.*}} i32 _ZL16never_referencedv()( +// CHECK: define {{.*}} i32 @_ZL13not_immediatev() diff --git a/clang/test/PCH/default-argument-with-immediate-calls.cpp b/clang/test/PCH/default-argument-with-immediate-calls.cpp new file mode 100644 index 0000000000000..510605a23d4e7 --- /dev/null +++ b/clang/test/PCH/default-argument-with-immediate-calls.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=c++20 -emit-pch %s -o %t +// RUN: %clang_cc1 -std=c++20 -include-pch %t -verify %s +// expected-no-diagnostics + +#ifndef HEADER_INCLUDED +#define HEADER_INCLUDED + +consteval int immediate(); +int regular_function() { + return 0; +} + +struct S { + int a = immediate() + regular_function(); +}; + +int f(int arg = immediate()) { + return arg; +} + +#else + +consteval int immediate() { + return 0; +} + +void test() { + f(0); + f(); + S s{0}; + S t{0}; +} + +#endif diff --git a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp new file mode 100644 index 0000000000000..511306e0d921a --- /dev/null +++ b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s + +consteval int undefined(); // expected-note 4 {{declared here}} + +void check_lambdas_unused( + int a = [] + { + // The body of a lambda is not a subexpression of the lambda + // so this is immediately evaluated even if the parameter + // is never used. + return undefined(); // expected-error {{not a constant expression}} \ + // expected-note {{undefined function 'undefined'}} + }(), + int b = [](int no_error = undefined()) { + return no_error; + }(0), + int c = [](int defaulted = undefined()) { + return defaulted; + }() +) {} + +int check_lambdas_used( + int b = [](int no_error = undefined()) { + return no_error; + }(0), + int c = [](int defaulted = undefined()) { // expected-error {{not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{undefined function 'undefined'}} + return defaulted; + }(), // expected-note {{in the default initalizer of 'defaulted'}} + int d = [](int defaulted = sizeof(undefined())) { + return defaulted; + }() +) { + return 0; +} + +int test_check_lambdas_used = check_lambdas_used(); + +struct UnusedInitWithLambda { + int a = [] { + return undefined(); // expected-error {{not a constant expression}} \ + // expected-note {{undefined function 'undefined'}} + }(); + // UnusedInitWithLambda is never constructed, so the initializer + // of b and undefined() are never evaluated. + int b = [](int no_error = undefined()) { + return no_error; + }(); +}; + +consteval int ub(int n) { + return 0/n; // expected-note {{division}} +} + +struct InitWithLambda { + int b = [](int error = undefined()) { // expected-error {{not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{undefined function 'undefined'}} + return error; + }(); // expected-note {{in the default initalizer of 'error'}} + int c = [](int error = sizeof(undefined()) + ub(0)) { // expected-error {{'ub' is not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{in call to 'ub(0)}} + return error; + }(); // expected-note {{in the default initalizer of 'error'}} +} i; // expected-note {{in implicit default constructor}} diff --git a/clang/test/SemaCXX/source_location.cpp b/clang/test/SemaCXX/source_location.cpp index ccb385f60dc4b..9cfe9207dd14d 100644 --- a/clang/test/SemaCXX/source_location.cpp +++ b/clang/test/SemaCXX/source_location.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fexceptions -verify %s +// RUN: %clang_cc1 -std=c++2a -fcxx-exceptions -DUSE_CONSTEVAL -fexceptions -verify %s // expected-no-diagnostics #define assert(...) ((__VA_ARGS__) ? ((void)0) : throw 42) @@ -8,15 +9,22 @@ template struct Printer; +#ifdef USE_CONSTEVAL +#define SOURCE_LOC_EVAL_KIND consteval +#else +#define SOURCE_LOC_EVAL_KIND constexpr +#endif + namespace std { class source_location { struct __impl; public: - static constexpr source_location current(const __impl *__p = __builtin_source_location()) noexcept { - source_location __loc; - __loc.__m_impl = __p; - return __loc; + static SOURCE_LOC_EVAL_KIND source_location + current(const __impl *__p = __builtin_source_location()) noexcept { + source_location __loc; + __loc.__m_impl = __p; + return __loc; } constexpr source_location() = default; constexpr source_location(source_location const &) = default; @@ -593,3 +601,51 @@ namespace TestConstexprContext { } static_assert(test()); } + +namespace Lambda { +#line 8000 "TestLambda.cpp" +constexpr int nested_lambda(int l = []{ + return SL::current().line(); +}()) { + return l; +} +static_assert(nested_lambda() == __LINE__ - 4); + +constexpr int lambda_param(int l = [](int l = SL::current().line()) { + return l; +}()) { + return l; +} +static_assert(lambda_param() == __LINE__); + + +} + +constexpr int compound_literal_fun(int a = + (int){ SL::current().line() } +) { return a ;} +static_assert(compound_literal_fun() == __LINE__); + +struct CompoundLiteral { + int a = (int){ SL::current().line() }; +}; +static_assert(CompoundLiteral{}.a == __LINE__); + + +// FIXME +// Init captures are subexpressions of the lambda expression +// so according to the standard immediate invocations in init captures +// should be evaluated at the call site. +// However Clang does not yet implement this as it would introduce +// a fair bit of complexity. +// We intend to implement that functionality once we find real world +// use cases that require it. +constexpr int test_init_capture(int a = + [b = SL::current().line()] { return b; }()) { + return a; +} +#ifdef USE_CONSTEVAL +static_assert(test_init_capture() == __LINE__ - 4); +#else +static_assert(test_init_capture() == __LINE__ ); +#endif From 4a6ef0078cd0ce5c6622cb141bca5cfb9bee5f22 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2022 20:55:41 -0700 Subject: [PATCH 218/516] [RISCV] Change SEXT_B/SEXT_H/ZEXT_H_RV64 handling in isAllUsesReadW in SExtWRemoval. These instrucitons only use the lower 8 or 16 bits. We don't need to look across them. --- llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index a31f898e104d7..bdc51326b7c8f 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -122,6 +122,9 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::FCVT_S_WU: case RISCV::FCVT_D_W: case RISCV::FCVT_D_WU: + case RISCV::SEXT_B: + case RISCV::SEXT_H: + case RISCV::ZEXT_H_RV64: continue; // these overwrite higher input bits, otherwise the lower word of output @@ -167,8 +170,6 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::CLMUL: case RISCV::ORC_B: case RISCV::ORN: - case RISCV::SEXT_B: - case RISCV::SEXT_H: case RISCV::SH1ADD: case RISCV::SH1ADD_UW: case RISCV::SH2ADD: @@ -176,7 +177,6 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::SH3ADD: case RISCV::SH3ADD_UW: case RISCV::XNOR: - case RISCV::ZEXT_H_RV64: addUses(*MI, Worklist, MRI); continue; default: From 2fb1324736f69391636297f973e697c41e2d7a61 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2022 20:56:47 -0700 Subject: [PATCH 219/516] [RISCV] Add FMV_H_X/FMV_W_X/FCVT_H_W/FCVT_H_WU to isAllUsesReadW in SExtWRemoval. The instructions only read the lower 16 or 32 bits of a GPR. --- llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index bdc51326b7c8f..14115982313a3 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -118,6 +118,10 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::FMV_H_X: + case RISCV::FMV_W_X: + case RISCV::FCVT_H_W: + case RISCV::FCVT_H_WU: case RISCV::FCVT_S_W: case RISCV::FCVT_S_WU: case RISCV::FCVT_D_W: From c9da0352a3a5480492b3c99b165240862a6eb646 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 4 Nov 2022 07:07:54 +0000 Subject: [PATCH 220/516] [libc] Convert the api-test in to an integration test. It used to pass but showed an annoying message about _start not being available. That will not happen anymore. --- libc/cmake/modules/LLVMLibCTestRules.cmake | 18 ------ libc/test/src/CMakeLists.txt | 59 +++++++------------ .../PrototypeTestGen/PrototypeTestGen.cpp | 4 +- 3 files changed, 22 insertions(+), 59 deletions(-) diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 131bbad7c9c64..5eb3d4e67b78d 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -436,24 +436,6 @@ function(add_integration_test test_name) libc.utils.IntegrationTest.test) list(REMOVE_DUPLICATES fq_deps_list) - # We don't want memory functions to be dependencies on integration tests. - # Memory functions should be tested using unittests. The main reason - # however is that compiler codegen can emit calls to memory functions. So, - # we add them explicitly to the integration test libc.a (see below). Adding - # explicit deps on the memory functions can potentially cause duplicate - # symbol errors. - set(memory_funcs "bcmp;bzero;memcmp;memcpy;memmove;memset") - foreach(dep IN LISTS fq_deps_list) - get_target_property(name ${dep} ENTRYPOINT_NAME) - if(NOT name) - continue() - endif() - list(FIND memory_funcs ${name} loc) - if(${loc} GREATER_EQUAL 0) - message(FATAL_ERROR "Memory function ${name} cannot be a dependency " - "for integration tests.") - endif() - endforeach() # TODO: Instead of gathering internal object files from entrypoints, # collect the object files with public names of entrypoints. get_object_files_for_test( diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index bb5bbb1e5b34d..c4f0aab5a3808 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -88,42 +88,28 @@ add_custom_command( ${LIBC_TARGET} ) -add_executable( - libc-api-test - EXCLUDE_FROM_ALL - ${public_test} +add_custom_target(libc-api-test) +set( + allocator_entrypoints + libc.src.stdlib.malloc + libc.src.stdlib.calloc + libc.src.stdlib.realloc + libc.src.stdlib.aligned_alloc + libc.src.stdlib.free ) -# Blank out default include directories to prevent accidentally including -# system headers or our own internal headers. -set_target_properties( - libc-api-test - PROPERTIES - INCLUDE_DIRECTORIES "" +set(api-test-entrypoints ${TARGET_LLVMLIBC_ENTRYPOINTS}) +list(REMOVE_ITEM api-test-entrypoints ${allocator_entrypoints}) +add_integration_test( + api-test + SUITE + libc-api-test + SRCS + ${public_test} + LOADER + libc.loader.linux.crt1 + DEPENDS + ${api-test-entrypoints} ) -target_link_libraries(libc-api-test ${LIBC_TARGET}) - -# Only include we need is the include for cpp::IsSame and our generated -# public headers. -target_include_directories( - libc-api-test BEFORE - PRIVATE - "${LIBC_SOURCE_DIR}/src/__support/CPP" - "${LIBC_BUILD_DIR}/include" -) -target_compile_options( - libc-api-test - PRIVATE - -ffreestanding -) -target_link_options( - libc-api-test - PRIVATE "-nostdlib" -) -set(library_files) -foreach(library_name IN LISTS "llvmlibc") - get_target_property(library_file ${library_name} "LIBRARY_FILE") - list(APPEND library_files ${library_file}) -endforeach() if(COMPILER_RESOURCE_DIR AND LLVM_LIBC_ENABLE_LINTING) add_custom_target( @@ -143,8 +129,3 @@ if(COMPILER_RESOURCE_DIR AND LLVM_LIBC_ENABLE_LINTING) ) add_dependencies(libc-api-test libc-api-test-tidy) endif() - -target_link_libraries(libc-api-test - PRIVATE - ${library_files} -) diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp index cd1e61b2f35a1..340f79ac9d14d 100644 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp @@ -22,7 +22,7 @@ llvm::cl::list } // anonymous namespace bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { - OS << "#include \"type_traits.h\"\n"; + OS << "#include \"src/__support/CPP/type_traits.h\"\n"; llvm_libc::APIIndexer G(records); std::unordered_set headerFileSet; for (const auto &entrypoint : EntrypointNamesOption) { @@ -45,7 +45,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { OS << '\n'; - OS << "int main() {\n"; + OS << "extern \"C\" int main() {\n"; for (const auto &entrypoint : EntrypointNamesOption) { auto match = G.FunctionSpecMap.find(entrypoint); if (match == G.FunctionSpecMap.end()) { From 4eea884959498b283335f18fc7899ba022bcb881 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 2 Nov 2022 22:38:50 +0000 Subject: [PATCH 221/516] [libc] Add implementation of setbuf and setvbuf. Reviewed By: michaelrj Differential Revision: https://reviews.llvm.org/D137356 --- libc/config/linux/x86_64/entrypoints.txt | 2 + libc/spec/stdc.td | 10 ++ libc/src/__support/File/file.cpp | 49 +++++++- libc/src/__support/File/file.h | 60 ++++++++-- libc/src/stdio/CMakeLists.txt | 26 +++++ libc/src/stdio/setbuf.cpp | 28 +++++ libc/src/stdio/setbuf.h | 20 ++++ libc/src/stdio/setvbuf.cpp | 27 +++++ libc/src/stdio/setvbuf.h | 21 ++++ libc/test/src/stdio/CMakeLists.txt | 32 ++++++ libc/test/src/stdio/setbuf_test.cpp | 68 +++++++++++ libc/test/src/stdio/setvbuf_test.cpp | 106 ++++++++++++++++++ .../PrototypeTestGen/PrototypeTestGen.cpp | 1 + 13 files changed, 435 insertions(+), 15 deletions(-) create mode 100644 libc/src/stdio/setbuf.cpp create mode 100644 libc/src/stdio/setbuf.h create mode 100644 libc/src/stdio/setvbuf.cpp create mode 100644 libc/src/stdio/setvbuf.h create mode 100644 libc/test/src/stdio/setbuf_test.cpp create mode 100644 libc/test/src/stdio/setvbuf_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 17f2c994c12fb..2c4867f48b1b9 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -393,6 +393,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.putc libc.src.stdio.putchar libc.src.stdio.puts + libc.src.stdio.setbuf + libc.src.stdio.setvbuf libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 69a3ac1e26d8b..64ee9b7c45399 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -613,6 +613,16 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "setbuf", + RetValSpec, + [ArgSpec, ArgSpec] + >, + FunctionSpec< + "setvbuf", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec, ArgSpec] + >, FunctionSpec< "sprintf", RetValSpec, diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 352b1a4d24005..9129f68b521d4 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -330,12 +330,49 @@ int File::close() { return 0; } -void File::set_buffer(void *buffer, size_t size, bool owned) { - if (own_buf) - free(buf); - buf = static_cast(buffer); - bufsize = size; - own_buf = owned; +int File::set_buffer(void *buffer, size_t size, int buffer_mode) { + // We do not need to lock the file as this method should be called before + // other operations are performed on the file. + + if (buffer != nullptr && size == 0) + return EINVAL; + + switch (buffer_mode) { + case _IOFBF: + case _IOLBF: + case _IONBF: + break; + default: + return EINVAL; + } + + if (buffer == nullptr && size != 0 && buffer_mode != _IONBF) { + // We exclude the case of buffer_mode == _IONBF in this branch + // because we don't need to allocate buffer in such a case. + if (own_buf) { + buf = realloc(buf, size); + } else { + buf = malloc(size); + own_buf = true; + } + bufsize = size; + // TODO: Handle allocation failures. + } else { + if (own_buf) + free(buf); + if (buffer_mode != _IONBF) { + buf = static_cast(buffer); + bufsize = size; + } else { + // We don't need any buffer. + buf = nullptr; + bufsize = 0; + } + own_buf = false; + } + bufmode = buffer_mode; + adjust_buf(); + return 0; } File::ModeFlags File::mode_flags(const char *mode) { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 7ea780d94f555..e08508bcb1d83 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -71,6 +71,11 @@ class File { Mutex mutex; + // For files which are readable, we should be able to support one ungetc + // operation even if |buf| is nullptr. So, in the constructor of File, we + // set |buf| to point to this buffer character. + char ungetc_buf; + void *buf; // Pointer to the stream buffer for buffered streams size_t bufsize; // Size of the buffer pointed to by |buf|. @@ -111,13 +116,13 @@ class File { }; protected: - bool write_allowed() const { + constexpr bool write_allowed() const { return mode & (static_cast(OpenMode::WRITE) | static_cast(OpenMode::APPEND) | static_cast(OpenMode::PLUS)); } - bool read_allowed() const { + constexpr bool read_allowed() const { return mode & (static_cast(OpenMode::READ) | static_cast(OpenMode::PLUS)); } @@ -125,15 +130,21 @@ class File { public: // We want this constructor to be constexpr so that global file objects // like stdout do not require invocation of the constructor which can - // potentially lead to static initialization order fiasco. + // potentially lead to static initialization order fiasco. Consequently, + // we will assume that the |buffer| and |buffer_size| argument are + // meaningful - that is, |buffer| is nullptr if and only if |buffer_size| + // is zero. This way, we will not have to employ the semantics of + // the set_buffer method and allocate a buffer. constexpr File(WriteFunc *wf, ReadFunc *rf, SeekFunc *sf, CloseFunc *cf, FlushFunc *ff, void *buffer, size_t buffer_size, int buffer_mode, bool owned, ModeFlags modeflags) : platform_write(wf), platform_read(rf), platform_seek(sf), platform_close(cf), platform_flush(ff), mutex(false, false, false), - buf(buffer), bufsize(buffer_size), bufmode(buffer_mode), own_buf(owned), - mode(modeflags), pos(0), prev_op(FileOp::NONE), read_limit(0), - eof(false), err(false) {} + ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode), + own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE), + read_limit(0), eof(false), err(false) { + adjust_buf(); + } // This function helps initialize the various fields of the File data // structure after a allocating memory for it via a call to malloc. @@ -156,6 +167,8 @@ class File { f->prev_op = FileOp::NONE; f->read_limit = f->pos = 0; f->eof = f->err = false; + + f->adjust_buf(); } // Buffered write of |len| bytes from |data| without the file lock. @@ -196,9 +209,16 @@ class File { } // Sets the internal buffer to |buffer| with buffering mode |mode|. - // |size| is the size of |buffer|. This new |buffer| is owned by the - // stream only if |owned| is true. - void set_buffer(void *buffer, size_t size, bool owned); + // |size| is the size of |buffer|. If |size| is non-zero, but |buffer| + // is nullptr, then a buffer owned by this file will be allocated. + // Else, |buffer| will not be owned by this file. + // + // Will return zero on success, or an error value on failure. Will fail + // if: + // 1. |buffer| is not a nullptr but |size| is zero. + // 2. |buffer_mode| is not one of _IOLBF, IOFBF or _IONBF. + // In both the above cases, error returned in EINVAL. + int set_buffer(void *buffer, size_t size, int buffer_mode); // Closes the file stream and frees up all resources owned by it. int close(); @@ -235,6 +255,28 @@ class File { size_t write_unlocked_lbf(const uint8_t *data, size_t len); size_t write_unlocked_fbf(const uint8_t *data, size_t len); size_t write_unlocked_nbf(const uint8_t *data, size_t len); + + constexpr void adjust_buf() { + if (read_allowed() && (buf == nullptr || bufsize == 0)) { + // We should allow atleast one ungetc operation. + // This might give an impression that a buffer will be used even when + // the user does not want a buffer. But, that will not be the case. + // For reading, the buffering does not come into play. For writing, let + // us take up the three different kinds of buffering separately: + // 1. If user wants _IOFBF but gives a zero buffer, buffering still + // happens in the OS layer until the user flushes. So, from the user's + // point of view, this single byte buffer does not affect their + // experience. + // 2. If user wants _IOLBF but gives a zero buffer, the reasoning is + // very similar to the _IOFBF case. + // 3. If user wants _IONBF, then the buffer is ignored for writing. + // So, all of the above cases, having a single ungetc buffer does not + // affect the behavior experienced by the user. + buf = &ungetc_buf; + bufsize = 1; + own_buf = false; // We shouldn't call free on |buf| when closing the file. + } + } }; // The implementaiton of this function is provided by the platfrom_file diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index f8b197d984c52..61ca8ce34bbba 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -341,6 +341,32 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + setbuf + SRCS + setbuf.cpp + HDRS + setbuf.h + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + setvbuf + SRCS + setvbuf.cpp + HDRS + setvbuf.h + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( sprintf SRCS diff --git a/libc/src/stdio/setbuf.cpp b/libc/src/stdio/setbuf.cpp new file mode 100644 index 0000000000000..b75963239216b --- /dev/null +++ b/libc/src/stdio/setbuf.cpp @@ -0,0 +1,28 @@ +//===-- Implementation of setbuf ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/setbuf.h" +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(void, setbuf, + (::FILE *__restrict stream, char *__restrict buf)) { + int mode = _IOFBF; + if (buf == nullptr) + mode = _IONBF; + int err = reinterpret_cast<__llvm_libc::File *>(stream)->set_buffer( + buf, BUFSIZ, mode); + if (err != 0) + errno = err; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/setbuf.h b/libc/src/stdio/setbuf.h new file mode 100644 index 0000000000000..7a158ac0f173e --- /dev/null +++ b/libc/src/stdio/setbuf.h @@ -0,0 +1,20 @@ +//===-- Implementation header of setbuf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SETBUF_H +#define LLVM_LIBC_SRC_STDIO_SETBUF_H + +#include + +namespace __llvm_libc { + +void setbuf(::FILE *__restrict stream, char *__restrict buf); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SETBUF_H diff --git a/libc/src/stdio/setvbuf.cpp b/libc/src/stdio/setvbuf.cpp new file mode 100644 index 0000000000000..162519fcca36b --- /dev/null +++ b/libc/src/stdio/setvbuf.cpp @@ -0,0 +1,27 @@ +//===-- Implementation of setvbuf -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/setvbuf.h" +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, setvbuf, + (::FILE *__restrict stream, char *__restrict buf, int type, + size_t size)) { + int err = reinterpret_cast<__llvm_libc::File *>(stream)->set_buffer(buf, size, + type); + if (err != 0) + errno = err; + return err; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/setvbuf.h b/libc/src/stdio/setvbuf.h new file mode 100644 index 0000000000000..bceedd8b44113 --- /dev/null +++ b/libc/src/stdio/setvbuf.h @@ -0,0 +1,21 @@ +//===-- Implementation header of setvbuf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SETVBUF_H +#define LLVM_LIBC_SRC_STDIO_SETVBUF_H + +#include + +namespace __llvm_libc { + +int setvbuf(::FILE *__restrict stream, char *__restrict buf, int type, + size_t size); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SETVBUF_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 904c669d63da6..b453af2bb13c5 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -37,6 +37,38 @@ add_libc_unittest( libc.src.stdio.ungetc ) +add_libc_unittest( + setbuf_test + SUITE + libc_stdio_unittests + SRCS + setbuf_test.cpp + DEPENDS + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fwrite + libc.src.stdio.setbuf + libc.src.stdio.ungetc +) + +add_libc_unittest( + setvbuf_test + SUITE + libc_stdio_unittests + SRCS + setvbuf_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fwrite + libc.src.stdio.setvbuf +) + add_libc_unittest( unlocked_fileop_test SUITE diff --git a/libc/test/src/stdio/setbuf_test.cpp b/libc/test/src/stdio/setbuf_test.cpp new file mode 100644 index 0000000000000..0a53e221cf425 --- /dev/null +++ b/libc/test/src/stdio/setbuf_test.cpp @@ -0,0 +1,68 @@ +//===-- Unittests for setbuf ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setbuf.h" +#include "src/stdio/ungetc.h" +#include "utils/UnitTest/Test.h" + +#include + +TEST(LlvmLibcSetbufTest, DefaultBufsize) { + // The idea in this test is to change the buffer after opening a file and + // ensure that read and write work as expected. + constexpr char FILENAME[] = "testdata/setbuf_test_default_bufsize.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + char buffer[BUFSIZ]; + __llvm_libc::setbuf(file, buffer); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r"); + __llvm_libc::setbuf(file, buffer); + ASSERT_FALSE(file == nullptr); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::fread(&data, 1, CONTENT_SIZE, file), CONTENT_SIZE); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} + +TEST(LlvmLibcSetbufTest, NullBuffer) { + // The idea in this test is that we set a null buffer and ensure that + // everything works correctly. + constexpr char FILENAME[] = "testdata/setbuf_test_null_buffer.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + __llvm_libc::setbuf(file, nullptr); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r"); + __llvm_libc::setbuf(file, nullptr); + ASSERT_FALSE(file == nullptr); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::fread(&data, 1, CONTENT_SIZE, file), CONTENT_SIZE); + ASSERT_STREQ(CONTENT, data); + + // Ensure that ungetc also works. + char unget_char = 'z'; + ASSERT_EQ(int(unget_char), __llvm_libc::ungetc(unget_char, file)); + char c; + ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1)); + ASSERT_EQ(c, unget_char); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} diff --git a/libc/test/src/stdio/setvbuf_test.cpp b/libc/test/src/stdio/setvbuf_test.cpp new file mode 100644 index 0000000000000..3cdcc044c38e1 --- /dev/null +++ b/libc/test/src/stdio/setvbuf_test.cpp @@ -0,0 +1,106 @@ +//===-- Unittests for setvbuf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setvbuf.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +TEST(LlvmLibcSetvbufTest, SetNBFBuffer) { + // The idea in this test is that we open a file for writing and reading, and + // then set a NBF buffer to the write handle. Since it is NBF, the data + // written using the write handle should be immediately readable by the read + // handle. + constexpr char FILENAME[] = "testdata/setvbuf_nbf.test"; + + ::FILE *fw = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(fw == nullptr); + char buffer[BUFSIZ]; + ASSERT_EQ(__llvm_libc::setvbuf(fw, buffer, _IONBF, BUFSIZ), 0); + + ::FILE *fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_FALSE(fr == nullptr); + + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + for (size_t i = 0; i < CONTENT_SIZE; ++i) { + ASSERT_EQ(size_t(1), __llvm_libc::fwrite(CONTENT + i, 1, 1, fw)); + char c; + ASSERT_EQ(size_t(1), __llvm_libc::fread(&c, 1, 1, fr)); + ASSERT_EQ(c, CONTENT[i]); + } + + ASSERT_EQ(0, __llvm_libc::fclose(fw)); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); + + // Make sure NBF buffer has no effect for reading. + fr = __llvm_libc::fopen(FILENAME, "r"); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::setvbuf(fr, buffer, _IONBF, BUFSIZ), 0); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); +} + +TEST(LlvmLibcSetvbufTest, SetLBFBuffer) { + // The idea in this test is that we open a file for writing and reading, and + // then set a LBF buffer to the write handle. Since it is LBF, the data + // written using the write handle should be available right after a '\n' is + // written. + constexpr char FILENAME[] = "testdata/setvbuf_lbf.test"; + + ::FILE *fw = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(fw == nullptr); + char buffer[BUFSIZ]; + ASSERT_EQ(__llvm_libc::setvbuf(fw, buffer, _IOLBF, BUFSIZ), 0); + + ::FILE *fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_FALSE(fr == nullptr); + + constexpr char CONTENT[] = "abcdef\n"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, fw)); + + // Note that CONTENT_SIZE worth of data written also includes the + // null-terminator '\0'. But, since it is after the new line character, + // it should not be availabe for reading. + char data[CONTENT_SIZE]; + ASSERT_EQ(CONTENT_SIZE - 1, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + char c; + ASSERT_EQ(size_t(0), __llvm_libc::fread(&c, 1, 1, fr)); + + data[CONTENT_SIZE - 1] = '\0'; + ASSERT_STREQ(CONTENT, data); + + ASSERT_EQ(0, __llvm_libc::fclose(fw)); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); + + // Make sure LBF buffer has no effect for reading. + fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_EQ(__llvm_libc::setvbuf(fr, buffer, _IOLBF, BUFSIZ), 0); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); +} + +TEST(LlvmLibcSetbufTest, InvalidBufferMode) { + constexpr char FILENAME[] = "testdata/setvbuf_invalid_bufmode.test"; + ::FILE *f = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(f == nullptr); + char buf[BUFSIZ]; + ASSERT_NE(__llvm_libc::setvbuf(f, buf, _IOFBF + _IOLBF + _IONBF, BUFSIZ), 0); + ASSERT_EQ(errno, EINVAL); + + errno = 0; + ASSERT_EQ(0, __llvm_libc::fclose(f)); +} diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp index 340f79ac9d14d..06f621052f154 100644 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp @@ -94,6 +94,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { // We provide dummy malloc and free implementations to support the case // when LLVM libc does to include them. OS << "void *malloc(size_t) { return nullptr; }\n"; + OS << "void *realloc(void *, size_t) { return nullptr; }\n"; OS << "void free(void *) {}\n"; return false; From de1aced75942eacd59ba46b131311a6bbbe4d0f2 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Fri, 4 Nov 2022 08:18:47 +0100 Subject: [PATCH 222/516] [flang] Handle non derived-type unlimited polymorphic allocation Runtime call to PointerNullifyDerived or AllocatableInitDerived should only be generated for derived-type allocation of polymorphic entities. With unlimited polymorphic entities, it is possible that the type spec is not a derived-type. Avoid failure in that case. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D137353 --- flang/lib/Lower/Allocatable.cpp | 4 ++++ flang/test/Lower/polymorphic.f90 | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 190fe1a698f3c..65f7e9c75b53b 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -482,6 +482,10 @@ class AllocateStmtHelper { if (!typeSpec) typeSpec = &alloc.type; + // Do not generate calls for non derived-type type spec. + if (!typeSpec->AsDerived()) + return; + assert(typeSpec && "type spec missing for polymorphic allocation"); std::string typeName = Fortran::lower::mangle::mangleName(typeSpec->derivedTypeSpec()); diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 6177845af8f34..232dfada79194 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -49,4 +49,13 @@ subroutine check() ! CHECK: %[[BOX2:.*]] = fir.embox %[[DT2]] : (!fir.ref>) -> !fir.class> ! CHECK: %[[CLASS2:.*]] = fir.convert %[[BOX2]] : (!fir.class>) -> !fir.class> ! CHECK: fir.call @_QMpolymorphic_testPprint(%[[CLASS2]]) : (!fir.class>) -> () + + subroutine test_allocate_unlimited_polymorphic_non_derived() + class(*), pointer :: u + allocate(integer::u) + end subroutine + +! CHECK-LABEL: test_allocate_unlimited_polymorphic_non_derived +! CHECK-NOT: _FortranAPointerNullifyDerived +! CHECK: fir.call @_FortranAPointerAllocate end module From ea60545b0e55a3ab0e34637af14278279ab65bfb Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 31 Oct 2022 15:10:14 +0000 Subject: [PATCH 223/516] [AMDGPU] Create new instructions in SIInstrInfo::moveToVALU Create new VALU instructions in moveToVALU instead of mutating the existing SALU instruction. This makes it easier to add extra operands so we can convert to the VOP3 form of VALU instructions. NFCI but it does have the minor side effect of removing duplicate implicit operands that were present on the original SALU if they are default implicit operands for the VALU. Differential Revision: https://reviews.llvm.org/D137324 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 133 +++++++++--------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll | 14 +- llvm/test/CodeGen/AMDGPU/wqm.ll | 24 ++-- 4 files changed, 90 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 27e0d56c76ec4..775da4759358f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6362,7 +6362,6 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; } - if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to // legalize its operands instead. @@ -6372,43 +6371,90 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; } + // Handle converting generic instructions like COPY-to-SGPR into + // COPY-to-VGPR. + if (NewOpcode == Opcode) { + Register DstReg = Inst.getOperand(0).getReg(); + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && + NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { + // Instead of creating a copy where src and dst are the same register + // class, we just replace all uses of dst with src. These kinds of + // copies interfere with the heuristics MachineSink uses to decide + // whether or not to split a critical edge. Since the pass assumes + // that copies will end up as machine instructions and not be + // eliminated. + addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); + MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); + MRI.clearKillFlags(Inst.getOperand(1).getReg()); + Inst.getOperand(0).setReg(DstReg); + + // Make sure we don't leave around a dead VGPR->SGPR copy. Normally + // these are deleted later, but at -O0 it would leave a suspicious + // looking illegal copy of an undef register. + for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) + Inst.removeOperand(I); + Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); + continue; + } + + Register NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + legalizeOperands(Inst, MDT); + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); + continue; + } + // Use the new VALU Opcode. - const MCInstrDesc &NewDesc = get(NewOpcode); - Inst.setDesc(NewDesc); + auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode)) + .setMIFlags(Inst.getFlags()); + for (const MachineOperand &Op : Inst.explicit_operands()) + NewInstr->addOperand(Op); // Remove any references to SCC. Vector instructions can't read from it, and // We're just about to add the implicit use / defs of VCC, and we don't want // both. - for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { - MachineOperand &Op = Inst.getOperand(i); - if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { + for (MachineOperand &Op : Inst.implicit_operands()) { + if (Op.getReg() == AMDGPU::SCC) { // Only propagate through live-def of SCC. if (Op.isDef() && !Op.isDead()) addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); if (Op.isUse()) - addSCCDefsToVALUWorklist(Op, Worklist); - Inst.removeOperand(i); + addSCCDefsToVALUWorklist(NewInstr, Worklist); } } + Inst.eraseFromParent(); + + Register NewDstReg; + if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) { + Register DstReg = NewInstr->getOperand(0).getReg(); + assert(DstReg.isVirtual()); + + // Update the destination register class. + const TargetRegisterClass *NewDstRC = + getDestEquivalentVGPRClass(*NewInstr); + assert(NewDstRC); + + NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + } + if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing // operands for the size and offset. unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; - Inst.addOperand(MachineOperand::CreateImm(0)); - Inst.addOperand(MachineOperand::CreateImm(Size)); - + NewInstr.addImm(0); + NewInstr.addImm(Size); } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { // The VALU version adds the second operand to the result, so insert an // extra 0 operand. - Inst.addOperand(MachineOperand::CreateImm(0)); + NewInstr.addImm(0); } - Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); - fixImplicitOperands(Inst); - if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { - const MachineOperand &OffsetWidthOp = Inst.getOperand(2); + const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2); // If we need to move this to VGPRs, we need to unpack the second operand // back into the 2 separate ones for bit offset and width. assert(OffsetWidthOp.isImm() && @@ -6417,56 +6463,20 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. - Inst.removeOperand(2); // Remove old immediate. - Inst.addOperand(MachineOperand::CreateImm(Offset)); - Inst.addOperand(MachineOperand::CreateImm(BitWidth)); + NewInstr->removeOperand(2); + NewInstr.addImm(Offset); + NewInstr.addImm(BitWidth); } - bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); - Register NewDstReg; - if (HasDst) { - Register DstReg = Inst.getOperand(0).getReg(); - if (DstReg.isPhysical()) - continue; - - // Update the destination register class. - const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); - if (!NewDstRC) - continue; - - if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && - NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { - // Instead of creating a copy where src and dst are the same register - // class, we just replace all uses of dst with src. These kinds of - // copies interfere with the heuristics MachineSink uses to decide - // whether or not to split a critical edge. Since the pass assumes - // that copies will end up as machine instructions and not be - // eliminated. - addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); - MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); - MRI.clearKillFlags(Inst.getOperand(1).getReg()); - Inst.getOperand(0).setReg(DstReg); - - // Make sure we don't leave around a dead VGPR->SGPR copy. Normally - // these are deleted later, but at -O0 it would leave a suspicious - // looking illegal copy of an undef register. - for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) - Inst.removeOperand(I); - Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); - continue; - } - - NewDstReg = MRI.createVirtualRegister(NewDstRC); - MRI.replaceRegWith(DstReg, NewDstReg); - } + fixImplicitOperands(*NewInstr); // Legalize the operands - CreatedBBTmp = legalizeOperands(Inst, MDT); + CreatedBBTmp = legalizeOperands(*NewInstr, MDT); if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) CreatedBB = CreatedBBTmp; - if (HasDst) - addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); + if (NewDstReg) + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } return CreatedBB; } @@ -7229,11 +7239,8 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, // SCC must be changed to an instruction that defines VCC. This function makes // sure that the instruction that defines SCC is added to the moveToVALU // worklist. -void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op, +void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, SetVectorType &Worklist) const { - assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse()); - - MachineInstr *SCCUseInst = Op.getParent(); // Look for a preceding instruction that either defines VCC or SCC. If VCC // then there is nothing to do because the defining instruction has been // converted to a VALU already. If SCC then that instruction needs to be diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 05589010654aa..bf4330ed00683 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -130,7 +130,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineInstr &SCCDefInst, SetVectorType &Worklist, Register NewCond = Register()) const; - void addSCCDefsToVALUWorklist(MachineOperand &Op, + void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, SetVectorType &Worklist) const; const TargetRegisterClass * diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll index c04ab319cc8c5..e6b6bd62edf4c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll @@ -12,7 +12,7 @@ define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -33,7 +33,7 @@ define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -58,7 +58,7 @@ define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: v_add_f32_e32 v1, v1, v2 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 idxen ; CHECK-NEXT: v_add_f32_e32 v0, v1, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ; return to shader part epilog main_body: @@ -124,7 +124,7 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -156,7 +156,7 @@ define amdgpu_ps float @test_strict_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -191,7 +191,7 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB6_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 @@ -246,7 +246,7 @@ define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB7_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: s_and_b64 exec, exec, s[14:15] diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 2167a5ab8f42d..7e612f53151c2 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -204,7 +204,7 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -219,7 +219,7 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -243,7 +243,7 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -258,7 +258,7 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -496,7 +496,7 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -518,7 +518,7 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -962,7 +962,7 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -986,7 +986,7 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -1176,7 +1176,7 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_nop 0 ; GFX9-W64-NEXT: buffer_load_dword v2, v2, s[0:3], 0 idxen ; GFX9-W64-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $scc killed $exec -; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_u32_e32 v1, v2, v1 @@ -1193,7 +1193,7 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v2, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $scc killed $exec -; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_nc_u32_e32 v1, v1, v2 @@ -2500,7 +2500,7 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -2522,7 +2522,7 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog From 722a0efe31e16e6a98dc3742cf6101d930ba3479 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 4 Nov 2022 08:24:52 +0100 Subject: [PATCH 224/516] Revert "Implement CWG2631" Breaks the build on some platforms. This reverts commit bf1e235695a7acdc3e868217e69d5b31ada06cb3. --- clang/docs/ReleaseNotes.rst | 5 - clang/include/clang/AST/ExprCXX.h | 108 ++------ clang/include/clang/AST/Stmt.h | 7 - .../clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Sema/Sema.h | 87 +----- clang/lib/AST/ASTImporter.cpp | 21 +- clang/lib/AST/ExprCXX.cpp | 68 +---- clang/lib/Parse/ParseCXXInlineMethods.cpp | 5 - clang/lib/Parse/ParseDeclCXX.cpp | 6 +- clang/lib/Sema/SemaDeclCXX.cpp | 90 ++++-- clang/lib/Sema/SemaExpr.cpp | 256 ++---------------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- clang/lib/Sema/TreeTransform.h | 21 +- clang/lib/Serialization/ASTReaderStmt.cpp | 12 +- clang/lib/Serialization/ASTWriterStmt.cpp | 6 - clang/test/CXX/class/class.local/p1-0x.cpp | 4 +- .../default-arguments-with-immediate.cpp | 54 ---- .../default-argument-with-immediate-calls.cpp | 34 --- .../cxx2a-consteval-default-params.cpp | 68 ----- clang/test/SemaCXX/source_location.cpp | 64 +---- 20 files changed, 144 insertions(+), 784 deletions(-) delete mode 100644 clang/test/CodeGenCXX/default-arguments-with-immediate.cpp delete mode 100644 clang/test/PCH/default-argument-with-immediate-calls.cpp delete mode 100644 clang/test/SemaCXX/cxx2a-consteval-default-params.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bd2d811b30668..ad1a00b4bbcc4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -578,11 +578,6 @@ C++ Language Changes in Clang conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. - Implemented DR2358 allowing init captures in lambdas in default arguments. -- Implemented DR2631. Invalid ``consteval`` calls in default arguments and default - member initializers are diagnosed when and if the default is used. - This Fixes `Issue 56379 `_ - and changes the value of ``std::source_location::current()`` - used in default parameters calls compared to previous versions of Clang. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 098720d9469f0..0b927c0294752 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -1245,12 +1245,8 @@ class CXXThrowExpr : public Expr { /// This wraps up a function call argument that was created from the /// corresponding parameter's default argument, when the call did not /// explicitly supply arguments for all of the parameters. -class CXXDefaultArgExpr final - : public Expr, - private llvm::TrailingObjects { +class CXXDefaultArgExpr final : public Expr { friend class ASTStmtReader; - friend class ASTReader; - friend TrailingObjects; /// The parameter whose default is being used. ParmVarDecl *Param; @@ -1259,7 +1255,7 @@ class CXXDefaultArgExpr final DeclContext *UsedContext; CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *Param, - Expr *RewrittenExpr, DeclContext *UsedContext) + DeclContext *UsedContext) : Expr(SC, Param->hasUnparsedDefaultArg() ? Param->getType().getNonReferenceType() @@ -1268,58 +1264,28 @@ class CXXDefaultArgExpr final Param->getDefaultArg()->getObjectKind()), Param(Param), UsedContext(UsedContext) { CXXDefaultArgExprBits.Loc = Loc; - CXXDefaultArgExprBits.HasRewrittenInit = RewrittenExpr != nullptr; - if (RewrittenExpr) - *getTrailingObjects() = RewrittenExpr; setDependence(computeDependence(this)); } - CXXDefaultArgExpr(EmptyShell Empty, bool HasRewrittenInit) - : Expr(CXXDefaultArgExprClass, Empty) { - CXXDefaultArgExprBits.HasRewrittenInit = HasRewrittenInit; - } - - size_t numTrailingObjects() const { - return CXXDefaultArgExprBits.HasRewrittenInit; - } - public: - static CXXDefaultArgExpr *CreateEmpty(const ASTContext &C, - bool HasRewrittenInit); + CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {} // \p Param is the parameter whose default argument is used by this // expression. static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, Expr *RewrittenExpr, - DeclContext *UsedContext); + ParmVarDecl *Param, + DeclContext *UsedContext) { + return new (C) + CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, UsedContext); + } + // Retrieve the parameter that the argument was created from. const ParmVarDecl *getParam() const { return Param; } ParmVarDecl *getParam() { return Param; } - bool hasRewrittenInit() const { - return CXXDefaultArgExprBits.HasRewrittenInit; - } - - // Retrieve the argument to the function call. - Expr *getExpr(); - const Expr *getExpr() const { - return const_cast(this)->getExpr(); - } - - Expr *getRewrittenExpr() { - return hasRewrittenInit() ? *getTrailingObjects() : nullptr; - } - - const Expr *getRewrittenExpr() const { - return const_cast(this)->getRewrittenExpr(); - } - - // Retrieve the rewritten init expression (for an init expression containing - // immediate calls) with the top level FullExpr and ConstantExpr stripped off. - Expr *getAdjustedRewrittenExpr(); - const Expr *getAdjustedRewrittenExpr() const { - return const_cast(this)->getAdjustedRewrittenExpr(); - } + // Retrieve the actual argument to the function call. + const Expr *getExpr() const { return getParam()->getDefaultArg(); } + Expr *getExpr() { return getParam()->getDefaultArg(); } const DeclContext *getUsedContext() const { return UsedContext; } DeclContext *getUsedContext() { return UsedContext; } @@ -1356,13 +1322,10 @@ class CXXDefaultArgExpr final /// is implicitly used in a mem-initializer-list in a constructor /// (C++11 [class.base.init]p8) or in aggregate initialization /// (C++1y [dcl.init.aggr]p7). -class CXXDefaultInitExpr final - : public Expr, - private llvm::TrailingObjects { - - friend class ASTStmtReader; +class CXXDefaultInitExpr : public Expr { friend class ASTReader; - friend TrailingObjects; + friend class ASTStmtReader; + /// The field whose default is being used. FieldDecl *Field; @@ -1370,29 +1333,16 @@ class CXXDefaultInitExpr final DeclContext *UsedContext; CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, QualType Ty, DeclContext *UsedContext, - Expr *RewrittenInitExpr); - - CXXDefaultInitExpr(EmptyShell Empty, bool HasRewrittenInit) - : Expr(CXXDefaultInitExprClass, Empty) { - CXXDefaultInitExprBits.HasRewrittenInit = HasRewrittenInit; - } + FieldDecl *Field, QualType Ty, DeclContext *UsedContext); - size_t numTrailingObjects() const { - return CXXDefaultInitExprBits.HasRewrittenInit; - } + CXXDefaultInitExpr(EmptyShell Empty) : Expr(CXXDefaultInitExprClass, Empty) {} public: - static CXXDefaultInitExpr *CreateEmpty(const ASTContext &C, - bool HasRewrittenInit); /// \p Field is the non-static data member whose default initializer is used /// by this expression. static CXXDefaultInitExpr *Create(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, DeclContext *UsedContext, - Expr *RewrittenInitExpr); - - bool hasRewrittenInit() const { - return CXXDefaultInitExprBits.HasRewrittenInit; + FieldDecl *Field, DeclContext *UsedContext) { + return new (Ctx) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), UsedContext); } /// Get the field whose initializer will be used. @@ -1400,23 +1350,13 @@ class CXXDefaultInitExpr final const FieldDecl *getField() const { return Field; } /// Get the initialization expression that will be used. - Expr *getExpr(); const Expr *getExpr() const { - return const_cast(this)->getExpr(); - } - - /// Retrieve the initializing expression with evaluated immediate calls, if - /// any. - const Expr *getRewrittenExpr() const { - assert(hasRewrittenInit() && "expected a rewritten init expression"); - return *getTrailingObjects(); + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + return Field->getInClassInitializer(); } - - /// Retrieve the initializing expression with evaluated immediate calls, if - /// any. - Expr *getRewrittenExpr() { - assert(hasRewrittenInit() && "expected a rewritten init expression"); - return *getTrailingObjects(); + Expr *getExpr() { + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + return Field->getInClassInitializer(); } const DeclContext *getUsedContext() const { return UsedContext; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index a894111be896a..49a66a1ea5b86 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -686,9 +686,6 @@ class alignas(void *) Stmt { unsigned : NumExprBits; - /// Whether this CXXDefaultArgExpr rewrote its argument and stores a copy. - unsigned HasRewrittenInit : 1; - /// The location where the default argument expression was used. SourceLocation Loc; }; @@ -699,10 +696,6 @@ class alignas(void *) Stmt { unsigned : NumExprBits; - /// Whether this CXXDefaultInitExprBitfields rewrote its argument and stores - /// a copy. - unsigned HasRewrittenInit : 1; - /// The location where the default initializer expression was used. SourceLocation Loc; }; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a720da687550a..1b1db765fa7a9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2644,10 +2644,6 @@ def err_invalid_consteval_take_address : Error< " of an immediate invocation">; def err_invalid_consteval_call : Error< "call to consteval function %q0 is not a constant expression">; -def note_invalid_consteval_initializer : Note< - "in the default initalizer of %0">; -def note_invalid_consteval_initializer_here : Note< - "initialized here %0">; def err_invalid_consteval_decl_kind : Error< "%0 cannot be declared consteval">; def err_invalid_constexpr : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f5151ac7f4c3e..e8c9cb966bae7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1330,25 +1330,6 @@ class Sema final { bool InDiscardedStatement; bool InImmediateFunctionContext; - bool IsCurrentlyCheckingDefaultArgumentOrInitializer = false; - - // When evaluating immediate functions in the initializer of a default - // argument or default member initializer, this is the declaration whose - // default initializer is being evaluated and the location of the call - // or constructor definition. - struct InitializationContext { - InitializationContext(SourceLocation Loc, ValueDecl *Decl, - DeclContext *Context) - : Loc(Loc), Decl(Decl), Context(Context) { - assert(Decl && Context && "invalid initialization context"); - }; - - SourceLocation Loc; - ValueDecl *Decl = nullptr; - DeclContext *Context = nullptr; - }; - llvm::Optional DelayedDefaultInitializationContext; - ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -6215,22 +6196,19 @@ class Sema final { bool IsStdInitListInitialization, bool RequiresZeroInit, unsigned ConstructKind, SourceRange ParenRange); - ExprResult ConvertMemberDefaultInitExpression(FieldDecl *FD, Expr *InitExpr, - SourceLocation InitLoc); - ExprResult BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field); /// Instantiate or parse a C++ default argument expression as necessary. /// Return true on error. bool CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *Init = nullptr, - bool SkipImmediateInvocations = true); + ParmVarDecl *Param); /// BuildCXXDefaultArgExpr - Creates a CXXDefaultArgExpr, instantiating /// the default expr if needed. - ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *Init = nullptr); + ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, + FunctionDecl *FD, + ParmVarDecl *Param); /// FinalizeVarWithDestructor - Prepare for calling destructor on the /// constructed variable. @@ -9634,63 +9612,6 @@ class Sema final { return ExprEvalContexts.back().isImmediateFunctionContext(); } - bool isCheckingDefaultArgumentOrInitializer() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - const ExpressionEvaluationContextRecord &Ctx = ExprEvalContexts.back(); - return (Ctx.Context == - ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || - Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer; - } - - bool isCheckingDefaultArgumentOrInitializerOfOuterEntity() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if ((Ctx.Context == - ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || - Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer) - return true; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - return false; - } - return false; - } - - llvm::Optional - InnermostDeclarationWithDelayedImmediateInvocations() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && - Ctx.DelayedDefaultInitializationContext) - return Ctx.DelayedDefaultInitializationContext; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - break; - } - return llvm::None; - } - - llvm::Optional - OutermostDeclarationWithDelayedImmediateInvocations() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - llvm::Optional - Res; - for (auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && - !Ctx.DelayedDefaultInitializationContext && Res) - break; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - break; - Res = Ctx.DelayedDefaultInitializationContext; - } - return Res; - } - /// RAII class used to determine whether SFINAE has /// trapped any errors that occur during template argument /// deduction. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 188c0f593a9ef..631dfaebabbd6 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -7687,16 +7687,9 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (Error Err = ImportDefaultArgOfParmVarDecl(*FromParam, ToParam)) return std::move(Err); } - Expr *RewrittenInit = nullptr; - if (E->hasRewrittenInit()) { - ExpectedExpr ExprOrErr = import(E->getExpr()); - if (!ExprOrErr) - return ExprOrErr.takeError(); - RewrittenInit = ExprOrErr.get(); - } + return CXXDefaultArgExpr::Create(Importer.getToContext(), *ToUsedLocOrErr, - *ToParamOrErr, RewrittenInit, - *UsedContextOrErr); + *ToParamOrErr, *UsedContextOrErr); } ExpectedStmt @@ -8388,16 +8381,8 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { ToField->setInClassInitializer(*ToInClassInitializerOrErr); } - Expr *RewrittenInit = nullptr; - if (E->hasRewrittenInit()) { - ExpectedExpr ExprOrErr = import(E->getExpr()); - if (!ExprOrErr) - return ExprOrErr.takeError(); - RewrittenInit = ExprOrErr.get(); - } - return CXXDefaultInitExpr::Create(Importer.getToContext(), *ToBeginLocOrErr, - ToField, *UsedContextOrErr, RewrittenInit); + ToField, *UsedContextOrErr); } ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 6a6f692dec787..3bf3eab72846c 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -949,43 +949,9 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const { return cast(getCalleeDecl())->getLiteralIdentifier(); } -CXXDefaultArgExpr *CXXDefaultArgExpr::CreateEmpty(const ASTContext &C, - bool HasRewrittenInit) { - size_t Size = totalSizeToAlloc(HasRewrittenInit); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultArgExpr(EmptyShell(), HasRewrittenInit); -} - -CXXDefaultArgExpr *CXXDefaultArgExpr::Create(const ASTContext &C, - SourceLocation Loc, - ParmVarDecl *Param, - Expr *RewrittenExpr, - DeclContext *UsedContext) { - size_t Size = totalSizeToAlloc(RewrittenExpr != nullptr); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, - RewrittenExpr, UsedContext); -} - -Expr *CXXDefaultArgExpr::getExpr() { - return CXXDefaultArgExprBits.HasRewrittenInit ? getAdjustedRewrittenExpr() - : getParam()->getDefaultArg(); -} - -Expr *CXXDefaultArgExpr::getAdjustedRewrittenExpr() { - assert(hasRewrittenInit() && - "expected this CXXDefaultArgExpr to have a rewritten init."); - Expr *Init = getRewrittenExpr(); - if (auto *E = dyn_cast_if_present(Init)) - if (!isa(E)) - return E->getSubExpr(); - return Init; -} - CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, FieldDecl *Field, - QualType Ty, DeclContext *UsedContext, - Expr *RewrittenInitExpr) + QualType Ty, DeclContext *UsedContext) : Expr(CXXDefaultInitExprClass, Ty.getNonLValueExprType(Ctx), Ty->isLValueReferenceType() ? VK_LValue : Ty->isRValueReferenceType() ? VK_XValue @@ -993,43 +959,11 @@ CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, /*FIXME*/ OK_Ordinary), Field(Field), UsedContext(UsedContext) { CXXDefaultInitExprBits.Loc = Loc; - CXXDefaultInitExprBits.HasRewrittenInit = RewrittenInitExpr != nullptr; - - if (CXXDefaultInitExprBits.HasRewrittenInit) - *getTrailingObjects() = RewrittenInitExpr; - assert(Field->hasInClassInitializer()); setDependence(computeDependence(this)); } -CXXDefaultInitExpr *CXXDefaultInitExpr::CreateEmpty(const ASTContext &C, - bool HasRewrittenInit) { - size_t Size = totalSizeToAlloc(HasRewrittenInit); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultInitExpr)); - return new (Mem) CXXDefaultInitExpr(EmptyShell(), HasRewrittenInit); -} - -CXXDefaultInitExpr *CXXDefaultInitExpr::Create(const ASTContext &Ctx, - SourceLocation Loc, - FieldDecl *Field, - DeclContext *UsedContext, - Expr *RewrittenInitExpr) { - - size_t Size = totalSizeToAlloc(RewrittenInitExpr != nullptr); - auto *Mem = Ctx.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), - UsedContext, RewrittenInitExpr); -} - -Expr *CXXDefaultInitExpr::getExpr() { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - if (hasRewrittenInit()) - return getRewrittenExpr(); - - return Field->getInClassInitializer(); -} - CXXTemporary *CXXTemporary::Create(const ASTContext &C, const CXXDestructorDecl *Destructor) { return new (C) CXXTemporary(Destructor); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 3a7f5426d4a70..d918ea26b9d9d 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -648,11 +648,6 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) { Actions.ActOnStartCXXInClassMemberInitializer(); - // The initializer isn't actually potentially evaluated unless it is - // used. - EnterExpressionEvaluationContext Eval( - Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed); - ExprResult Init = ParseCXXMemberInitializer(MI.Field, /*IsFunction=*/false, EqualLoc); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index a2f07ea5d59fc..bbffff5394f04 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3184,11 +3184,7 @@ ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction, "Data member initializer not starting with '=' or '{'"); EnterExpressionEvaluationContext Context( - Actions, - isa_and_present(D) - ? Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed - : Sema::ExpressionEvaluationContext::PotentiallyEvaluated, - D); + Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, D); if (TryConsumeToken(tok::equal, EqualLoc)) { if (Tok.is(tok::kw_delete)) { // In principle, an initializer of '= delete p;' is legal, but it will diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index c78ce37f372a0..ea7997b347959 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -4039,21 +4039,6 @@ ExprResult Sema::ActOnRequiresClause(ExprResult ConstraintExpr) { return ConstraintExpr; } -ExprResult Sema::ConvertMemberDefaultInitExpression(FieldDecl *FD, - Expr *InitExpr, - SourceLocation InitLoc) { - InitializedEntity Entity = - InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); - InitializationKind Kind = - FD->getInClassInitStyle() == ICIS_ListInit - ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), - InitExpr->getBeginLoc(), - InitExpr->getEndLoc()) - : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); - InitializationSequence Seq(*this, Entity, Kind, InitExpr); - return Seq.Perform(*this, Entity, Kind, InitExpr); -} - /// This is invoked after parsing an in-class initializer for a /// non-static C++ class member, and after instantiating an in-class initializer /// in a class template. Such actions are deferred until the class is complete. @@ -4082,7 +4067,16 @@ void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D, ExprResult Init = InitExpr; if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) { - Init = ConvertMemberDefaultInitExpression(FD, InitExpr, InitLoc); + InitializedEntity Entity = + InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); + InitializationKind Kind = + FD->getInClassInitStyle() == ICIS_ListInit + ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), + InitExpr->getBeginLoc(), + InitExpr->getEndLoc()) + : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); + InitializationSequence Seq(*this, Entity, Kind, InitExpr); + Init = Seq.Perform(*this, Entity, Kind, InitExpr); if (Init.isInvalid()) { FD->setInvalidDecl(); return; @@ -15623,6 +15617,70 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, Constructor); } +ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { + assert(Field->hasInClassInitializer()); + + // If we already have the in-class initializer nothing needs to be done. + if (Field->getInClassInitializer()) + return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); + + // If we might have already tried and failed to instantiate, don't try again. + if (Field->isInvalidDecl()) + return ExprError(); + + // Maybe we haven't instantiated the in-class initializer. Go check the + // pattern FieldDecl to see if it has one. + CXXRecordDecl *ParentRD = cast(Field->getParent()); + + if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { + CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); + DeclContext::lookup_result Lookup = + ClassPattern->lookup(Field->getDeclName()); + + FieldDecl *Pattern = nullptr; + for (auto *L : Lookup) { + if (isa(L)) { + Pattern = cast(L); + break; + } + } + assert(Pattern && "We must have set the Pattern!"); + + if (!Pattern->hasInClassInitializer() || + InstantiateInClassInitializer(Loc, Field, Pattern, + getTemplateInstantiationArgs(Field))) { + // Don't diagnose this again. + Field->setInvalidDecl(); + return ExprError(); + } + return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); + } + + // DR1351: + // If the brace-or-equal-initializer of a non-static data member + // invokes a defaulted default constructor of its class or of an + // enclosing class in a potentially evaluated subexpression, the + // program is ill-formed. + // + // This resolution is unworkable: the exception specification of the + // default constructor can be needed in an unevaluated context, in + // particular, in the operand of a noexcept-expression, and we can be + // unable to compute an exception specification for an enclosed class. + // + // Any attempt to resolve the exception specification of a defaulted default + // constructor before the initializer is lexically complete will ultimately + // come here at which point we can diagnose it. + RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); + Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) + << OutermostClass << Field; + Diag(Field->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); + // Recover by marking the field invalid, unless we're in a SFINAE context. + if (!isSFINAEContext()) + Field->setInvalidDecl(); + return ExprError(); +} + void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) { if (VD->isInvalidDecl()) return; // If initializing the variable failed, don't also diagnose problems with diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2849bf45ead5b..2493b4a76d5e1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5856,10 +5856,8 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *RewrittenInit, - bool SkipImmediateInvocations) { + ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { - assert(!RewrittenInit && "Should not have a rewritten init expression yet"); // If we've already cleared out the location for the default argument, // that means we're parsing it right now. if (!UnparsedDefaultArgLocs.count(Param)) { @@ -5876,14 +5874,11 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, return true; } - if (Param->hasUninstantiatedDefaultArg()) { - assert(!RewrittenInit && "Should not have a rewitten init expression yet"); - if (InstantiateDefaultArgument(CallLoc, FD, Param)) - return true; - } + if (Param->hasUninstantiatedDefaultArg() && + InstantiateDefaultArgument(CallLoc, FD, Param)) + return true; - Expr *Init = RewrittenInit ? RewrittenInit : Param->getInit(); - assert(Init && "default argument but no initializer?"); + assert(Param->hasInit() && "default argument but no initializer?"); // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll @@ -5892,237 +5887,34 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. - if (auto *InitWithCleanup = dyn_cast(Init)) { + if (auto Init = dyn_cast(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. - Cleanup.setExprNeedsCleanups(InitWithCleanup->cleanupsHaveSideEffects()); + Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); + // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. - assert(!InitWithCleanup->getNumObjects() && + assert(!Init->getNumObjects() && "default argument expression has capturing blocks?"); } + + // We already type-checked the argument, so we know it works. + // Just mark all of the declarations in this potentially-evaluated expression + // as being "referenced". EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer = - SkipImmediateInvocations; - MarkDeclarationsReferencedInExpr(Init, /*SkipLocalVariables*/ true); + MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), + /*SkipLocalVariables=*/true); return false; } -struct ImmediateCallVisitor : public RecursiveASTVisitor { - bool HasImmediateCalls = false; - - bool VisitCallExpr(CallExpr *E) { - if (const FunctionDecl *FD = E->getDirectCallee()) - HasImmediateCalls |= FD->isConsteval(); - return RecursiveASTVisitor::VisitStmt(E); - } - - // SourceLocExpr are not immediate invocations - // but CXXDefaultInitExpr/CXXDefaultArgExpr containing a SourceLocExpr - // need to be rebuilt so that they refer to the correct SourceLocation and - // DeclContext. - bool VisitSourceLocExpr(SourceLocExpr *E) { - HasImmediateCalls = true; - return RecursiveASTVisitor::VisitStmt(E); - } - - // A nested lambda might have parameters with immediate invocations - // in their default arguments. - // The compound statement is not visited (as it does not constitute a - // subexpression). - // FIXME: We should consider visiting and transforming captures - // with init expressions. - bool VisitLambdaExpr(LambdaExpr *E) { - return VisitCXXMethodDecl(E->getCallOperator()); - } - - // Blocks don't support default parameters, and, as for lambdas, - // we don't consider their body a subexpression. - bool VisitBlockDecl(BlockDecl *B) { return false; } - - bool VisitCompoundStmt(CompoundStmt *B) { - assert("Unexpected Compound statement in default parameter or initializer"); - return false; - } - - bool VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { - return TraverseStmt(E->getExpr()); - } - - bool VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { - return TraverseStmt(E->getExpr()); - } -}; - -struct EnsureImmediateInvocationInDefaultArgs - : TreeTransform { - EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef) - : TreeTransform(SemaRef) {} - - // Lambda can only have immediate invocations in the default - // args of their parameters, which is transformed upon calling the closure. - // The body is not a subexpression, so we have nothing to do. - // FIXME: Immediate calls in capture initializers should be transformed. - ExprResult TransformLambdaExpr(LambdaExpr *E) { return E; } - ExprResult TransformBlockExpr(BlockExpr *E) { return E; } -}; - ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, ParmVarDecl *Param, - Expr *Init) { + FunctionDecl *FD, ParmVarDecl *Param) { assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); - - bool NestedDefaultChecking = - isCheckingDefaultArgumentOrInitializerOfOuterEntity(); - - llvm::Optional - InitializationContext = - OutermostDeclarationWithDelayedImmediateInvocations(); - if (!InitializationContext.has_value()) - InitializationContext.emplace(CallLoc, Param, CurContext); - - if (!Init && !Param->hasUnparsedDefaultArg()) { - // Mark that we are replacing a default argument first. - // If we are instantiating a template we won't have to - // retransform immediate calls. - EnterExpressionEvaluationContext EvalContext( - *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - ExprEvalContexts.back().DelayedDefaultInitializationContext = { - CallLoc, Param, CurContext}; - - if (Param->hasUninstantiatedDefaultArg()) { - if (InstantiateDefaultArgument(CallLoc, FD, Param)) - return ExprError(); - } else { - // CWG2631 - // An immediate invocation that is not evaluated where it appears is - // evaluated and checked for whether it is a constant expression at the - // point where the enclosing initializer is used in a function call. - ImmediateCallVisitor V; - if (!NestedDefaultChecking) - V.TraverseDecl(Param); - if (V.HasImmediateCalls) { - EnsureImmediateInvocationInDefaultArgs Immediate(*this); - ExprResult Res = Immediate.TransformExpr(Param->getInit()); - if (Res.isInvalid()) - return ExprError(); - Res = ConvertParamDefaultArgument(Param, Res.get(), - Res.get()->getBeginLoc()); - if (Res.isInvalid()) - return ExprError(); - Init = Res.get(); - } - } - } - - if (CheckCXXDefaultArgExpr( - CallLoc, FD, Param, Init, - /*SkipImmediateInvocations=*/NestedDefaultChecking)) + if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) return ExprError(); - - return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param, - Init, InitializationContext->Context); -} - -ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - assert(Field->hasInClassInitializer()); - - // If we might have already tried and failed to instantiate, don't try again. - if (Field->isInvalidDecl()) - return ExprError(); - - auto *ParentRD = cast(Field->getParent()); - - llvm::Optional - InitializationContext = - OutermostDeclarationWithDelayedImmediateInvocations(); - if (!InitializationContext.has_value()) - InitializationContext.emplace(Loc, Field, CurContext); - - Expr *Init = nullptr; - - bool NestedDefaultChecking = - isCheckingDefaultArgumentOrInitializerOfOuterEntity(); - - if (!Field->getInClassInitializer()) { - // Maybe we haven't instantiated the in-class initializer. Go check the - // pattern FieldDecl to see if it has one. - if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { - CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); - DeclContext::lookup_result Lookup = - ClassPattern->lookup(Field->getDeclName()); - - FieldDecl *Pattern = nullptr; - for (auto *L : Lookup) { - if ((Pattern = dyn_cast(L))) - break; - } - assert(Pattern && "We must have set the Pattern!"); - if (!Pattern->hasInClassInitializer() || - InstantiateInClassInitializer(Loc, Field, Pattern, - getTemplateInstantiationArgs(Field))) { - Field->setInvalidDecl(); - return ExprError(); - } - } - } else { - // CWG2631 - // An immediate invocation that is not evaluated where it appears is - // evaluated and checked for whether it is a constant expression at the - // point where the enclosing initializer is used in a [...] a constructor - // definition, or an aggregate initialization. - EnterExpressionEvaluationContext EvalContext( - *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field); - ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field, - CurContext}; - - ImmediateCallVisitor V; - if (!NestedDefaultChecking) - V.TraverseDecl(Field); - if (V.HasImmediateCalls) { - EnsureImmediateInvocationInDefaultArgs Immediate(*this); - ExprResult Res = Immediate.TransformExpr(Field->getInClassInitializer()); - if (!Res.isInvalid()) - Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc); - if (Res.isInvalid()) { - Field->setInvalidDecl(); - return ExprError(); - } - Init = Res.get(); - } else if (!NestedDefaultChecking) { - MarkDeclarationsReferencedInExpr(Field->getInClassInitializer()); - } - } - if (Field->getInClassInitializer()) - return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc, - Field, InitializationContext->Context, - Init); - - // DR1351: - // If the brace-or-equal-initializer of a non-static data member - // invokes a defaulted default constructor of its class or of an - // enclosing class in a potentially evaluated subexpression, the - // program is ill-formed. - // - // This resolution is unworkable: the exception specification of the - // default constructor can be needed in an unevaluated context, in - // particular, in the operand of a noexcept-expression, and we can be - // unable to compute an exception specification for an enclosed class. - // - // Any attempt to resolve the exception specification of a defaulted default - // constructor before the initializer is lexically complete will ultimately - // come here at which point we can diagnose it. - RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); - Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) - << OutermostClass << Field; - Diag(Field->getEndLoc(), - diag::note_default_member_initializer_not_yet_parsed); - // Recover by marking the field invalid, unless we're in a SFINAE context. - if (!isSFINAEContext()) - Field->setInvalidDecl(); - return ExprError(); + return CXXDefaultArgExpr::Create(Context, CallLoc, Param, CurContext); } Sema::VariadicCallType @@ -17747,7 +17539,6 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { if (isUnevaluatedContext() || !E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() || - isCheckingDefaultArgumentOrInitializer() || RebuildingImmediateInvocation || isImmediateFunctionContext()) return E; @@ -17793,14 +17584,8 @@ static void EvaluateAndDiagnoseImmediateInvocation( FD = Call->getConstructor(); else llvm_unreachable("unhandled decl kind"); - assert(FD && FD->isConsteval()); + assert(FD->isConsteval()); SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call) << FD; - if (auto Context = - SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) { - SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer) - << Context->Decl; - SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at); - } for (auto &Note : Notes) SemaRef.Diag(Note.first, Note.second); return; @@ -19946,8 +19731,7 @@ void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { if (auto *FD = dyn_cast(E->getDecl())) if (!isUnevaluatedContext() && !isConstantEvaluated() && - !isImmediateFunctionContext() && - !isCheckingDefaultArgumentOrInitializer() && FD->isConsteval() && + !isImmediateFunctionContext() && FD->isConsteval() && !RebuildingImmediateInvocation && !FD->isDependentContext()) ExprEvalContexts.back().ReferenceToConsteval.insert(E); MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 709162e01809b..9e41dfbfdbe95 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1978,9 +1978,9 @@ ExprResult TemplateInstantiator::TransformCXXDefaultArgExpr( assert(!cast(E->getParam()->getDeclContext())-> getDescribedFunctionTemplate() && "Default arg expressions are never formed in dependent cases."); - return SemaRef.BuildCXXDefaultArgExpr( - E->getUsedLocation(), cast(E->getParam()->getDeclContext()), - E->getParam()); + return SemaRef.BuildCXXDefaultArgExpr(E->getUsedLocation(), + cast(E->getParam()->getDeclContext()), + E->getParam()); } template @@ -3407,8 +3407,6 @@ bool Sema::InstantiateInClassInitializer( ContextRAII SavedContext(*this, Instantiation->getParent()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); - ExprEvalContexts.back().DelayedDefaultInitializationContext = { - PointOfInstantiation, Instantiation, CurContext}; LocalInstantiationScope Scope(*this, true); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index ead72463aca78..ab34a9d611b9c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3172,10 +3172,9 @@ class TreeTransform { /// By default, builds a new default-argument expression, which does not /// require any semantic analysis. Subclasses may override this routine to /// provide different behavior. - ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param, - Expr *RewrittenExpr) { + ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) { return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param, - RewrittenExpr, getSema().CurContext); + getSema().CurContext); } /// Build a new C++11 default-initialization expression. @@ -3185,7 +3184,8 @@ class TreeTransform { /// routine to provide different behavior. ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - return getSema().BuildCXXDefaultInitExpr(Loc, Field); + return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field, + getSema().CurContext); } /// Build a new C++ zero-initialization expression. @@ -12094,20 +12094,11 @@ TreeTransform::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (!Param) return ExprError(); - ExprResult InitRes; - if (E->hasRewrittenInit()) { - InitRes = getDerived().TransformExpr(E->getRewrittenExpr()); - if (InitRes.isInvalid()) - return ExprError(); - } - if (!getDerived().AlwaysRebuild() && Param == E->getParam() && - E->getUsedContext() == SemaRef.CurContext && - InitRes.get() == E->getRewrittenExpr()) + E->getUsedContext() == SemaRef.CurContext) return E; - return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param, - InitRes.get()); + return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 08f9f0bf50d03..2a3c6e7231785 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1824,9 +1824,6 @@ void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { E->Param = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultArgExprBits.Loc = readSourceLocation(); - E->CXXDefaultArgExprBits.HasRewrittenInit = Record.readInt(); - if (E->CXXDefaultArgExprBits.HasRewrittenInit) - *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { @@ -1834,9 +1831,6 @@ void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { E->Field = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultInitExprBits.Loc = readSourceLocation(); - E->CXXDefaultInitExprBits.HasRewrittenInit = Record.readInt(); - if (E->CXXDefaultInitExprBits.HasRewrittenInit) - *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { @@ -3835,13 +3829,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_DEFAULT_ARG: - S = CXXDefaultArgExpr::CreateEmpty( - Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); + S = new (Context) CXXDefaultArgExpr(Empty); break; case EXPR_CXX_DEFAULT_INIT: - S = CXXDefaultInitExpr::CreateEmpty( - Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); + S = new (Context) CXXDefaultInitExpr(Empty); break; case EXPR_CXX_BIND_TEMPORARY: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 6e4101ac122ee..e2ba69ca1eec8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1745,9 +1745,6 @@ void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Record.AddDeclRef(E->getParam()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getUsedLocation()); - Record.push_back(E->hasRewrittenInit()); - if (E->hasRewrittenInit()) - Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_ARG; } @@ -1756,9 +1753,6 @@ void ASTStmtWriter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { Record.AddDeclRef(E->getField()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getExprLoc()); - Record.push_back(E->hasRewrittenInit()); - if (E->hasRewrittenInit()) - Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_INIT; } diff --git a/clang/test/CXX/class/class.local/p1-0x.cpp b/clang/test/CXX/class/class.local/p1-0x.cpp index 096f5080099ec..49125f5f9b062 100644 --- a/clang/test/CXX/class/class.local/p1-0x.cpp +++ b/clang/test/CXX/class/class.local/p1-0x.cpp @@ -11,8 +11,8 @@ void f() { int x = 3; // expected-note{{'x' declared here}} struct C { int& x2 = x; // expected-error{{reference to local variable 'x' declared in enclosing lambda expression}} - }c; // expected-note {{required here}} + }; }; - C(); // expected-note {{required here}} + C(); } diff --git a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp deleted file mode 100644 index 54a02ffc06836..0000000000000 --- a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -std=c++2a -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s - -consteval int immediate() { return 0;} -static int ext(); -void f(int a = immediate() + ext()); - -void test_function() { - f(); - f(0); - // CHECK: call noundef i32 @_ZL3extv() - // CHECK: add - // CHECK: call {{.*}} @_Z1fi - // CHECK: call {{.*}} @_Z1fi -} - -// CHECK: define {{.*}} i32 @_ZL3extv() - -static constexpr int not_immediate(); -struct A { - int a = immediate() + not_immediate(); -}; - -void test_member() { - // CHECK: call void @_ZN1AC2Ev - A defaulted; - // CHECK-NOT: call void @_ZN1AC2Ev - A provided{0}; -} - -// CHECK: define {{.*}} void @_ZN1AC2Ev{{.*}} -// CHECK: %call = call noundef i32 @_ZL13not_immediatev() - -int never_referenced() {return 42;}; - - -namespace not_used { - -struct A { - int a = immediate() + never_referenced(); -}; -void f(int a = immediate() + never_referenced()); - -void g() { - A a{0}; - f(0); -} - -} - -static int ext() {return 0;} -static constexpr int not_immediate() {return 0;} - -// CHECK-NOT: define {{.*}} i32 _ZL16never_referencedv()( -// CHECK: define {{.*}} i32 @_ZL13not_immediatev() diff --git a/clang/test/PCH/default-argument-with-immediate-calls.cpp b/clang/test/PCH/default-argument-with-immediate-calls.cpp deleted file mode 100644 index 510605a23d4e7..0000000000000 --- a/clang/test/PCH/default-argument-with-immediate-calls.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: %clang_cc1 -std=c++20 -emit-pch %s -o %t -// RUN: %clang_cc1 -std=c++20 -include-pch %t -verify %s -// expected-no-diagnostics - -#ifndef HEADER_INCLUDED -#define HEADER_INCLUDED - -consteval int immediate(); -int regular_function() { - return 0; -} - -struct S { - int a = immediate() + regular_function(); -}; - -int f(int arg = immediate()) { - return arg; -} - -#else - -consteval int immediate() { - return 0; -} - -void test() { - f(0); - f(); - S s{0}; - S t{0}; -} - -#endif diff --git a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp deleted file mode 100644 index 511306e0d921a..0000000000000 --- a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s - -consteval int undefined(); // expected-note 4 {{declared here}} - -void check_lambdas_unused( - int a = [] - { - // The body of a lambda is not a subexpression of the lambda - // so this is immediately evaluated even if the parameter - // is never used. - return undefined(); // expected-error {{not a constant expression}} \ - // expected-note {{undefined function 'undefined'}} - }(), - int b = [](int no_error = undefined()) { - return no_error; - }(0), - int c = [](int defaulted = undefined()) { - return defaulted; - }() -) {} - -int check_lambdas_used( - int b = [](int no_error = undefined()) { - return no_error; - }(0), - int c = [](int defaulted = undefined()) { // expected-error {{not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{undefined function 'undefined'}} - return defaulted; - }(), // expected-note {{in the default initalizer of 'defaulted'}} - int d = [](int defaulted = sizeof(undefined())) { - return defaulted; - }() -) { - return 0; -} - -int test_check_lambdas_used = check_lambdas_used(); - -struct UnusedInitWithLambda { - int a = [] { - return undefined(); // expected-error {{not a constant expression}} \ - // expected-note {{undefined function 'undefined'}} - }(); - // UnusedInitWithLambda is never constructed, so the initializer - // of b and undefined() are never evaluated. - int b = [](int no_error = undefined()) { - return no_error; - }(); -}; - -consteval int ub(int n) { - return 0/n; // expected-note {{division}} -} - -struct InitWithLambda { - int b = [](int error = undefined()) { // expected-error {{not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{undefined function 'undefined'}} - return error; - }(); // expected-note {{in the default initalizer of 'error'}} - int c = [](int error = sizeof(undefined()) + ub(0)) { // expected-error {{'ub' is not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{in call to 'ub(0)}} - return error; - }(); // expected-note {{in the default initalizer of 'error'}} -} i; // expected-note {{in implicit default constructor}} diff --git a/clang/test/SemaCXX/source_location.cpp b/clang/test/SemaCXX/source_location.cpp index 9cfe9207dd14d..ccb385f60dc4b 100644 --- a/clang/test/SemaCXX/source_location.cpp +++ b/clang/test/SemaCXX/source_location.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fexceptions -verify %s -// RUN: %clang_cc1 -std=c++2a -fcxx-exceptions -DUSE_CONSTEVAL -fexceptions -verify %s // expected-no-diagnostics #define assert(...) ((__VA_ARGS__) ? ((void)0) : throw 42) @@ -9,22 +8,15 @@ template struct Printer; -#ifdef USE_CONSTEVAL -#define SOURCE_LOC_EVAL_KIND consteval -#else -#define SOURCE_LOC_EVAL_KIND constexpr -#endif - namespace std { class source_location { struct __impl; public: - static SOURCE_LOC_EVAL_KIND source_location - current(const __impl *__p = __builtin_source_location()) noexcept { - source_location __loc; - __loc.__m_impl = __p; - return __loc; + static constexpr source_location current(const __impl *__p = __builtin_source_location()) noexcept { + source_location __loc; + __loc.__m_impl = __p; + return __loc; } constexpr source_location() = default; constexpr source_location(source_location const &) = default; @@ -601,51 +593,3 @@ namespace TestConstexprContext { } static_assert(test()); } - -namespace Lambda { -#line 8000 "TestLambda.cpp" -constexpr int nested_lambda(int l = []{ - return SL::current().line(); -}()) { - return l; -} -static_assert(nested_lambda() == __LINE__ - 4); - -constexpr int lambda_param(int l = [](int l = SL::current().line()) { - return l; -}()) { - return l; -} -static_assert(lambda_param() == __LINE__); - - -} - -constexpr int compound_literal_fun(int a = - (int){ SL::current().line() } -) { return a ;} -static_assert(compound_literal_fun() == __LINE__); - -struct CompoundLiteral { - int a = (int){ SL::current().line() }; -}; -static_assert(CompoundLiteral{}.a == __LINE__); - - -// FIXME -// Init captures are subexpressions of the lambda expression -// so according to the standard immediate invocations in init captures -// should be evaluated at the call site. -// However Clang does not yet implement this as it would introduce -// a fair bit of complexity. -// We intend to implement that functionality once we find real world -// use cases that require it. -constexpr int test_init_capture(int a = - [b = SL::current().line()] { return b; }()) { - return a; -} -#ifdef USE_CONSTEVAL -static_assert(test_init_capture() == __LINE__ - 4); -#else -static_assert(test_init_capture() == __LINE__ ); -#endif From c05f1639f7f4a8e81ad83bba99bae95553c6064e Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Fri, 4 Nov 2022 07:25:22 +0000 Subject: [PATCH 225/516] [clang][cuda/hip] Allow `__noinline__` lambdas D124866 seem to have had an unintended side effect: __noinline__ on lambdas was no longer accepted. This fixes the regression and adds a test case for it. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D137251 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Parse/ParseExprCXX.cpp | 17 ++++++++++++++++- clang/test/CodeGenCUDA/lambda-noinline.cu | 23 +++++++++++++++++++++++ clang/test/Parser/lambda-attr.cu | 9 +++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCUDA/lambda-noinline.cu diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ad1a00b4bbcc4..7bb1405c131ab 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -638,6 +638,9 @@ C++2b Feature Support CUDA/HIP Language Changes in Clang ---------------------------------- + - Allow the use of ``__noinline__`` as a keyword (instead of ``__attribute__((noinline))``) + in lambda declarations. + Objective-C Language Changes in Clang ------------------------------------- diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index e34bd8d7bca40..a768c4da504af 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1291,7 +1291,22 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( if (getLangOpts().CUDA) { // In CUDA code, GNU attributes are allowed to appear immediately after the // "[...]", even if there is no "(...)" before the lambda body. - MaybeParseGNUAttributes(D); + // + // Note that we support __noinline__ as a keyword in this mode and thus + // it has to be separately handled. + while (true) { + if (Tok.is(tok::kw___noinline__)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + Attr.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, + ParsedAttr::AS_Keyword); + } else if (Tok.is(tok::kw___attribute)) + ParseGNUAttributes(Attr, nullptr, &D); + else + break; + } + + D.takeAttributes(Attr); } // Helper to emit a warning if we see a CUDA host/device/global attribute diff --git a/clang/test/CodeGenCUDA/lambda-noinline.cu b/clang/test/CodeGenCUDA/lambda-noinline.cu new file mode 100644 index 0000000000000..de2196e63f074 --- /dev/null +++ b/clang/test/CodeGenCUDA/lambda-noinline.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Checks noinline is correctly added to the lambda function. + +// HOST: define{{.*}}@_ZZ4HostvENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// HOST: attributes #[[ATTR]]{{.*}}noinline + +// DEV: define{{.*}}@_ZZ6DevicevENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// DEV: attributes #[[ATTR]]{{.*}}noinline + +__device__ int a; +int b; + +__device__ int Device() { return ([&] __device__ __noinline__ (){ return a; })(); } + +__host__ int Host() { return ([&] __host__ __noinline__ (){ return b; })(); } diff --git a/clang/test/Parser/lambda-attr.cu b/clang/test/Parser/lambda-attr.cu index 886212b97f50b..7fa128effd512 100644 --- a/clang/test/Parser/lambda-attr.cu +++ b/clang/test/Parser/lambda-attr.cu @@ -18,6 +18,10 @@ __attribute__((device)) void device_attr() { ([&](int) __attribute__((device)){ device_fn(); })(0); // expected-warning@-1 {{nvcc does not allow '__device__' to appear after the parameter list in lambdas}} ([&] __attribute__((device)) (int) { device_fn(); })(0); + + // test that noinline can appear anywhere. + ([&] __attribute__((device)) __noinline__ () { device_fn(); })(); + ([&] __noinline__ __attribute__((device)) () { device_fn(); })(); } __attribute__((host)) __attribute__((device)) void host_device_attrs() { @@ -37,6 +41,11 @@ __attribute__((host)) __attribute__((device)) void host_device_attrs() { // expected-warning@-1 {{nvcc does not allow '__host__' to appear after the parameter list in lambdas}} // expected-warning@-2 {{nvcc does not allow '__device__' to appear after the parameter list in lambdas}} ([&] __attribute__((host)) __attribute__((device)) (int) { hd_fn(); })(0); + + // test that noinline can also appear anywhere. + ([] __attribute__((host)) __attribute__((device)) () { hd_fn(); })(); + ([] __attribute__((host)) __noinline__ __attribute__((device)) () { hd_fn(); })(); + ([] __attribute__((host)) __attribute__((device)) __noinline__ () { hd_fn(); })(); } // TODO: Add tests for __attribute__((global)) once we support global lambdas. From e787708bcf53e8849481f2d0267f7e8b5dbf6c8d Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 2 Nov 2022 15:23:00 -0700 Subject: [PATCH 226/516] [clang-format][NFC] Remove parsePPElIf() Differential Revision: https://reviews.llvm.org/D137308 --- clang/lib/Format/UnwrappedLineParser.cpp | 6 +----- clang/lib/Format/UnwrappedLineParser.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 77140831c2c06..18ec0844db3d4 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1108,12 +1108,10 @@ void UnwrappedLineParser::parsePPDirective() { parsePPIf(/*IfDef=*/true); break; case tok::pp_else: - parsePPElse(); - break; case tok::pp_elifdef: case tok::pp_elifndef: case tok::pp_elif: - parsePPElIf(); + parsePPElse(); break; case tok::pp_endif: parsePPEndIf(); @@ -1223,8 +1221,6 @@ void UnwrappedLineParser::parsePPElse() { ++PPBranchLevel; } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index b9b106bcc89a4..34f211c9ebb35 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -119,7 +119,6 @@ class UnwrappedLineParser { void parsePPDirective(); void parsePPDefine(); void parsePPIf(bool IfDef); - void parsePPElIf(); void parsePPElse(); void parsePPEndIf(); void parsePPPragma(); From 01ec0ff2dcf5528b0e6728e5693b583e4e6f83f1 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 3 Nov 2022 14:13:08 +0100 Subject: [PATCH 227/516] [SimplifyCFG] Allow speculating block containing assume() SpeculativelyExecuteBB(), which converts a branch + phi structure into a select, currently bails out if the block contains an assume (because it is not speculatable). Adjust the fold to ignore ephemeral values (i.e. assumes and values only used in assumes) for cost modelling purposes, and drop them when performing the fold. Theoretically, we could try to preserve the assume information by generating a assume(br_cond || assume_cond) style assume, but this is very unlikely to to be useful (because we don't do anything useful with assumes of this form) and it would make things substantially more complicated once we take operand bundle assumes into account (which don't really support a || operation). I'd prefer not to do that without good motivation. Differential Revision: https://reviews.llvm.org/D137339 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 17 +++++++- .../CodeGen/ARM/2010-05-18-PostIndexBug.ll | 4 +- llvm/test/Transforms/SimplifyCFG/assume.ll | 42 +++++-------------- .../SimplifyCFG/two-entry-phi-fold-crash.ll | 11 ++--- 4 files changed, 30 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 80854e8ffbd26..ee7f8b2b1f7e9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2658,6 +2658,8 @@ class EphemeralValueTracker { } return false; } + + bool contains(const Instruction *I) const { return EphValues.contains(I); } }; } // namespace @@ -2885,7 +2887,8 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; - for (Instruction &I : drop_end(*ThenBB)) { + EphemeralValueTracker EphTracker; + for (Instruction &I : reverse(drop_end(*ThenBB))) { // Skip debug info. if (isa(I)) { SpeculatedDbgIntrinsics.push_back(&I); @@ -2904,6 +2907,10 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, continue; } + // Ignore ephemeral values, they will be dropped by the transform. + if (EphTracker.track(&I)) + continue; + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2979,10 +2986,16 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // be misleading while debugging. // Similarly strip attributes that maybe dependent on condition we are // hoisting above. - for (auto &I : *ThenBB) { + for (auto &I : make_early_inc_range(*ThenBB)) { if (!SpeculatedStoreValue || &I != SpeculatedStore) I.setDebugLoc(DebugLoc()); I.dropUndefImplyingAttrsAndUnknownMetadata(); + + // Drop ephemeral values. + if (EphTracker.contains(&I)) { + I.replaceAllUsesWith(PoisonValue::get(I.getType())); + I.eraseFromParent(); + } } // Hoist the instructions. diff --git a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index 24469cc3717e9..f0b7141b5c7a2 100644 --- a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=THUMB ; rdar://7998649 %struct.foo = type { i64, i64 } diff --git a/llvm/test/Transforms/SimplifyCFG/assume.ll b/llvm/test/Transforms/SimplifyCFG/assume.ll index 1091f74518a2d..cd41a8040d8b7 100644 --- a/llvm/test/Transforms/SimplifyCFG/assume.ll +++ b/llvm/test/Transforms/SimplifyCFG/assume.ll @@ -22,14 +22,8 @@ define void @assume_undef_to_unreachable() { define i32 @speculate_block_with_assume_basic(i1 %c, i32 %x) { ; CHECK-LABEL: @speculate_block_with_assume_basic( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] -; CHECK: if: -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[X:%.*]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[IF]] ] -; CHECK-NEXT: ret i32 [[PHI]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] ; entry: br i1 %c, label %if, label %join @@ -47,15 +41,9 @@ join: define i32 @speculate_block_with_assume_extra_instr(i1 %c, i32 %x) { ; CHECK-LABEL: @speculate_block_with_assume_extra_instr( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] -; CHECK: if: ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD]], [[IF]] ] -; CHECK-NEXT: ret i32 [[PHI]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[ADD]], i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] ; entry: br i1 %c, label %if, label %join @@ -71,6 +59,8 @@ join: ret i32 %phi } +; We only allow speculating one instruction. Here %add and %add2 are used by +; the assume, but not ephemeral, because they are also used by %phi. define i32 @speculate_block_with_assume_extra_instrs_too_many(i1 %c, i32 %x) { ; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_too_many( ; CHECK-NEXT: entry: @@ -103,16 +93,9 @@ join: define i32 @speculate_block_with_assume_extra_instrs_okay(i1 %c, i32 %x) { ; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_okay( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] -; CHECK: if: ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD2]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD]], [[IF]] ] -; CHECK-NEXT: ret i32 [[PHI]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[ADD]], i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] ; entry: br i1 %c, label %if, label %join @@ -132,13 +115,8 @@ join: define i32 @speculate_block_with_assume_operand_bundle(i1 %c, ptr %p) { ; CHECK-LABEL: @speculate_block_with_assume_operand_bundle( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] -; CHECK: if: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[P:%.*]]) ] -; CHECK-NEXT: br label [[JOIN]] -; CHECK: join: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[IF]] ] -; CHECK-NEXT: ret i32 [[PHI]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] ; entry: br i1 %c, label %if, label %join diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll index cfad18938461d..2d698d1c42b99 100644 --- a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll +++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll @@ -8,16 +8,11 @@ define i32 @wibble(i8* %arg, i8** %arg1) { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[BORG:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[BORG]], [[BB8:%.*]] ] -; CHECK-NEXT: [[BORG3:%.*]] = phi i32 [ 8, [[BB]] ], [ [[BORG10:%.*]], [[BB8]] ] +; CHECK-NEXT: [[BORG:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[BORG]], [[BB2]] ] +; CHECK-NEXT: [[BORG3:%.*]] = phi i32 [ 8, [[BB]] ], [ [[SPEC_SELECT:%.*]], [[BB2]] ] ; CHECK-NEXT: [[BORG4:%.*]] = tail call i32 @blam(i8* [[ARG:%.*]], i32 [[BORG]]) ; CHECK-NEXT: [[BORG5:%.*]] = icmp eq i32 [[BORG4]], 0 -; CHECK-NEXT: br i1 [[BORG5]], label [[BB8]], label [[BB6:%.*]] -; CHECK: bb6: -; CHECK-NEXT: [[BORG7:%.*]] = load i8*, i8** [[ARG1:%.*]], align 4 -; CHECK-NEXT: br label [[BB8]] -; CHECK: bb8: -; CHECK-NEXT: [[BORG10]] = phi i32 [ [[BORG4]], [[BB6]] ], [ [[BORG3]], [[BB2]] ] +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[BORG5]], i32 [[BORG3]], i32 [[BORG4]] ; CHECK-NEXT: [[BORG11:%.*]] = icmp ult i32 [[BORG]], 2 ; CHECK-NEXT: br i1 [[BORG11]], label [[BB2]], label [[BB12:%.*]] ; CHECK: bb12: From 9145bf13b7df41d955efce6778994fb455d42d58 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 28 Oct 2022 23:20:28 +0000 Subject: [PATCH 228/516] [libc] Add example programs and their CMake build and instructions. These examples are serve as an examples for people wanting to start using the libc. Reviewed By: michaelrj, jeffbailey Differential Revision: https://reviews.llvm.org/D137090 --- libc/examples/README.md | 80 +++++++++++++++++++++++- libc/examples/examples.cmake | 16 +++++ libc/examples/hello_world/.gitignore | 10 +++ libc/examples/hello_world/CMakeLists.txt | 8 +++ libc/examples/hello_world/hello_world.c | 14 +++++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 libc/examples/examples.cmake create mode 100644 libc/examples/hello_world/.gitignore create mode 100644 libc/examples/hello_world/CMakeLists.txt create mode 100644 libc/examples/hello_world/hello_world.c diff --git a/libc/examples/README.md b/libc/examples/README.md index ecdbe33656939..5d6cb9489ae66 100644 --- a/libc/examples/README.md +++ b/libc/examples/README.md @@ -1 +1,79 @@ -Coming soon, stay tuned! +Examples +======== +This directory contains a few example programs which illustrate how one can set +up their own projects to use LLVM's libc, either as an overlay or as the only +libc in their projects. See the +[the usage mode document](https://libc.llvm.org/usage_modes.html) for more +information about the different modes in which one can build and use the libc. + +Building the Examples +===================== +Each example has its own directory which contain the source code and the CMake +build set up. To build an example, create a directory named `build` in the +example's directory: + +```bash +$> cd +$> mkdir build +$> cd build +``` + +Each example can be built to use the libc in either +[the overlay mode](https://libc.llvm.org/overlay_mode.html) or the +[full build mode](https://libc.llvm.org/fullbuild_mode.html). The CMake +configure step differs slightly depending on the mode you want to use the libc +in. + +Building against an overlay libc +-------------------------------- + +Before you can link an example against the overlay libc, you will have to +install it. See [the documentation of the overlay mode](https://libc.llvm.org/overlay_mode.html) +to learn how to install the libc's overlay static archive named `libllvmlibc.a`. +Once installed, to build an example against it, you have specify the directory +in which the static archive is installed with the option +`LIBC_OVERLAY_ARCHIVE_DIR`: + +```bash +$> cmake ../ -G \ + -DLIBC_OVERLAY_ARCHIVE_DIR= +``` + +Next, if `Ninja` is used for ``, you can build the example as follows: + +```bash +$> ninja +``` + +Building against a full libc +---------------------------- + +Before you can link an example against the full libc, you will have to first +install it. See [the documentation of the full build mode](https://libc.llvm.org/fullbuild_mode.html) +to learn how to install a full libc along with the other LLVM toolchain pieces +like `clang`, `lld` and `compiler-rt`. The CMake build for the examples will +assume that you have all of these components installed in a special sysroot +(see decription of the `--sysroot` option +[here](https://gcc.gnu.org/onlinedocs/gcc/Directory-Options.html).) Once you +have installed them, you have to inform CMake that we are linking against the +full libc as follows: + +```bash +$> cmake ../ -G -DLIBC_FULLBUILD=ON \ + -DCMAKE_SYSROOT= \ + -DCMAKE_C_COMPILER=/bin/clang \ + -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY +``` + +`` is the path to the sysroot directory you have set up while +installing the full libc. The option +`-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY` tells CMake to not attempt +linking full executables against shared libraries. We have to use this as LLVM's +libc does not yet have support for shared libraries and dynamic linking. After +the above `cmake` command, assuming `Ninja` was used for ``, you can build +the example as follows: + + +```bash +$> ninja +``` diff --git a/libc/examples/examples.cmake b/libc/examples/examples.cmake new file mode 100644 index 0000000000000..81e99e3cbede9 --- /dev/null +++ b/libc/examples/examples.cmake @@ -0,0 +1,16 @@ +function(add_example name) + add_executable( + ${name} + ${ARGN} + ) + + if(LIBC_FULLBUILD) + target_link_options(${name} PRIVATE -static -rtlib=compiler-rt -fuse-ld=lld) + elseif(LIBC_OVERLAY_ARCHIVE_DIR) + target_link_directories(${name} PRIVATE ${LIBC_OVERLAY_ARCHIVE_DIR}) + target_link_options(${name} PRIVATE -l:libllvmlibc.a) + else() + message(FATAL_ERROR "Either LIBC_FULLBUILD should be on or " + "LIBC_OVERLAY_ARCHIVE_DIR should be set.") + endif() +endfunction() diff --git a/libc/examples/hello_world/.gitignore b/libc/examples/hello_world/.gitignore new file mode 100644 index 0000000000000..0bda4771f3297 --- /dev/null +++ b/libc/examples/hello_world/.gitignore @@ -0,0 +1,10 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +# +# This file is intentionally different from the output of `git svn show-ignore`, +# as most of those are useless. +#==============================================================================# + +# Nested build directory +/build* diff --git a/libc/examples/hello_world/CMakeLists.txt b/libc/examples/hello_world/CMakeLists.txt new file mode 100644 index 0000000000000..89bf35c0340d0 --- /dev/null +++ b/libc/examples/hello_world/CMakeLists.txt @@ -0,0 +1,8 @@ +project(hello_world) +cmake_minimum_required(VERSION 3.13.4) +include(../examples.cmake) + +add_example( + hello_world + hello_world.c +) diff --git a/libc/examples/hello_world/hello_world.c b/libc/examples/hello_world/hello_world.c new file mode 100644 index 0000000000000..d065bdbb5a5bf --- /dev/null +++ b/libc/examples/hello_world/hello_world.c @@ -0,0 +1,14 @@ +//===-- libc example - hello world ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +int main() { + printf("Hello, World\n"); + return 0; +} From 3d2165f7381644ff2b26523d36ff250949e9a049 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 4 Nov 2022 08:33:28 +0000 Subject: [PATCH 229/516] [libc][NFC] Remove "$>" from recipe commands blocks in examples/README.md. The blocks come with a copy button so removing the "$>" makes it easy to copy paste the commands. --- libc/examples/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libc/examples/README.md b/libc/examples/README.md index 5d6cb9489ae66..ea82c5caf923c 100644 --- a/libc/examples/README.md +++ b/libc/examples/README.md @@ -35,14 +35,14 @@ in which the static archive is installed with the option `LIBC_OVERLAY_ARCHIVE_DIR`: ```bash -$> cmake ../ -G \ - -DLIBC_OVERLAY_ARCHIVE_DIR= +cmake ../ -G \ + -DLIBC_OVERLAY_ARCHIVE_DIR= ``` Next, if `Ninja` is used for ``, you can build the example as follows: ```bash -$> ninja +ninja ``` Building against a full libc @@ -59,10 +59,10 @@ have installed them, you have to inform CMake that we are linking against the full libc as follows: ```bash -$> cmake ../ -G -DLIBC_FULLBUILD=ON \ - -DCMAKE_SYSROOT= \ - -DCMAKE_C_COMPILER=/bin/clang \ - -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY +cmake ../ -G -DLIBC_FULLBUILD=ON \ + -DCMAKE_SYSROOT= \ + -DCMAKE_C_COMPILER=/bin/clang \ + -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY ``` `` is the path to the sysroot directory you have set up while @@ -75,5 +75,5 @@ the example as follows: ```bash -$> ninja +ninja ``` From 086757a47a51c626d85fab4ef2a1eb909a4db108 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 4 Nov 2022 08:37:05 +0000 Subject: [PATCH 230/516] [libc][NFC] Remove "$>" from another command block on examples/README.md. --- libc/examples/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/examples/README.md b/libc/examples/README.md index ea82c5caf923c..36b886090c6c1 100644 --- a/libc/examples/README.md +++ b/libc/examples/README.md @@ -13,9 +13,9 @@ build set up. To build an example, create a directory named `build` in the example's directory: ```bash -$> cd -$> mkdir build -$> cd build +cd +mkdir build +cd build ``` Each example can be built to use the libc in either @@ -59,7 +59,7 @@ have installed them, you have to inform CMake that we are linking against the full libc as follows: ```bash -cmake ../ -G -DLIBC_FULLBUILD=ON \ +cmake ../ -G -DLIBC_FULLBUILD=ON \ -DCMAKE_SYSROOT= \ -DCMAKE_C_COMPILER=/bin/clang \ -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY From beed1de64d56a95989e9e951b496e228f1d48000 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 11:49:22 +0100 Subject: [PATCH 231/516] [X86] Use default attributes for more intrinsics Another followup to D136939, adding default attributes to the remaining readnone intrinsics. This also covers some non-readnone intrinsics, because they were interleaved, and it seemed to make more sense to update them at the same time. Differential Revision: https://reviews.llvm.org/D137250 --- llvm/include/llvm/IR/IntrinsicsX86.td | 2107 +++++++++++++------------ 1 file changed, 1114 insertions(+), 993 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 4a836e9e917c4..9ce993446d07e 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4370,638 +4370,646 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512_conflict_d_128 : ClangBuiltin<"__builtin_ia32_vpconflictsi_128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_256 : ClangBuiltin<"__builtin_ia32_vpconflictsi_256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_512 : ClangBuiltin<"__builtin_ia32_vpconflictsi_512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], + [IntrNoMem]>; def int_x86_avx512_conflict_q_128 : ClangBuiltin<"__builtin_ia32_vpconflictdi_128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_256 : ClangBuiltin<"__builtin_ia32_vpconflictdi_256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_512 : ClangBuiltin<"__builtin_ia32_vpconflictdi_512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; } // Compares let TargetPrefix = "x86" in { // 512-bit def int_x86_avx512_vcomi_sd : ClangBuiltin<"__builtin_ia32_vcomisd">, - Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_vcomi_ss : ClangBuiltin<"__builtin_ia32_vcomiss">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; } // Compress, Expand let TargetPrefix = "x86" in { def int_x86_avx512_mask_compress : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrNoMem]>; def int_x86_avx512_mask_expand : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrNoMem]>; } // truncate let TargetPrefix = "x86" in { def int_x86_avx512_mask_pmov_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_512 : - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_512 : - ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_512 : - ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_256 : - ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_256 : - ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_512 : - ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_512 : - ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_128 : - ClangBuiltin<"__builtin_ia32_pmovdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_128 : - ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_128 : - ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_256 : - ClangBuiltin<"__builtin_ia32_pmovdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_256 : - ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_256 : - ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_512 : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_512 : - ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_512 : - ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_512 : - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_512 : - ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_512 : - ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovwb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovwb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_256 : - ClangBuiltin<"__builtin_ia32_pmovswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_256 : - ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_512 : - ClangBuiltin<"__builtin_ia32_pmovswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb512_mask">, + DefaultAttrsIntrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_512 : - ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">, + DefaultAttrsIntrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; } // Bitwise ternary logic let TargetPrefix = "x86" in { def int_x86_avx512_pternlog_d_128 : - ClangBuiltin<"__builtin_ia32_pternlogd128">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd128">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_256 : - ClangBuiltin<"__builtin_ia32_pternlogd256">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd256">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_512 : - ClangBuiltin<"__builtin_ia32_pternlogd512">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd512">, + DefaultAttrsIntrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_128 : - ClangBuiltin<"__builtin_ia32_pternlogq128">, - Intrinsic<[llvm_v2i64_ty], - [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq128">, + DefaultAttrsIntrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_256 : - ClangBuiltin<"__builtin_ia32_pternlogq256">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq256">, + DefaultAttrsIntrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_512 : - ClangBuiltin<"__builtin_ia32_pternlogq512">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq512">, + DefaultAttrsIntrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // vp2intersect let TargetPrefix = "x86" in { def int_x86_avx512_vp2intersect_q_512 : - Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], - [llvm_v8i64_ty, llvm_v8i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], + [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_q_256 : - Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], - [llvm_v4i64_ty, llvm_v4i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_q_128 : - Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty], - [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v2i1_ty, llvm_v2i1_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_512 : - Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty], - [llvm_v16i32_ty, llvm_v16i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i1_ty, llvm_v16i1_ty], + [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_256 : - Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], - [llvm_v8i32_ty, llvm_v8i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_128 : - Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], - [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; } // Misc. @@ -5009,57 +5017,70 @@ let TargetPrefix = "x86" in { // NOTE: These comparison intrinsics are not used by clang as long as the // distinction in signaling behaviour is not implemented. def int_x86_avx512_mask_cmp_ps_512 : - Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_pd_512 : - Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_ps_256 : - Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i32_ty, llvm_v8i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i32_ty, llvm_v8i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_pd_256 : - Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i32_ty, llvm_v4i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ps_128 : - Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_v4i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_pd_128 : - Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_v2i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ss : - ClangBuiltin<"__builtin_ia32_cmpss_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_cmpss_mask">, + DefaultAttrsIntrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_sd : - ClangBuiltin<"__builtin_ia32_cmpsd_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_cmpsd_mask">, + DefaultAttrsIntrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; } //===----------------------------------------------------------------------===// // SHA intrinsics let TargetPrefix = "x86" in { def int_x86_sha1rnds4 : ClangBuiltin<"__builtin_ia32_sha1rnds4">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_sha1nexte : ClangBuiltin<"__builtin_ia32_sha1nexte">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha1msg1 : ClangBuiltin<"__builtin_ia32_sha1msg1">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha1msg2 : ClangBuiltin<"__builtin_ia32_sha1msg2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha256rnds2 : ClangBuiltin<"__builtin_ia32_sha256rnds2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha256msg1 : ClangBuiltin<"__builtin_ia32_sha256msg1">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha256msg2 : ClangBuiltin<"__builtin_ia32_sha256msg2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -5144,40 +5165,43 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512bf16_cvtne2ps2bf16_128: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_256: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, - Intrinsic<[llvm_v16bf16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_512: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, - Intrinsic<[llvm_v32bf16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], + [IntrNoMem]>; // Intrinsic must be masked due to it producing less than 128 bits of results. def int_x86_avx512bf16_mask_cvtneps2bf16_128: - Intrinsic<[llvm_v8bf16_ty], - [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v4i1_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v4i1_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_256: - ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_512: - ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">, - Intrinsic<[llvm_v16bf16_ty], [llvm_v16f32_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_128: - ClangBuiltin<"__builtin_ia32_dpbf16ps_128">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_256: - ClangBuiltin<"__builtin_ia32_dpbf16ps_256">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_512: - ClangBuiltin<"__builtin_ia32_dpbf16ps_512">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_512">, + DefaultAttrsIntrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -5354,9 +5378,9 @@ let TargetPrefix = "x86" in { llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_cast_vector_to_tile: - Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>; def int_x86_cast_tile_to_vector: - Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } let TargetPrefix = "x86" in { @@ -5375,38 +5399,38 @@ def int_x86_cmpccxadd64 //===----------------------------------------------------------------------===// let TargetPrefix = "x86" in { // AMX_FP16 - Intel FP16 AMX extensions - def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], - [ImmArg>, - ImmArg>, ImmArg>]>; +def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, + DefaultAttrsIntrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg>, ImmArg>, + ImmArg>]>; def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>; def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics @@ -5448,751 +5472,848 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512fp16_add_ph_512 : ClangBuiltin<"__builtin_ia32_addph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_sub_ph_512 : ClangBuiltin<"__builtin_ia32_subph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mul_ph_512 : ClangBuiltin<"__builtin_ia32_mulph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_div_ph_512 : ClangBuiltin<"__builtin_ia32_divph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_max_ph_128 : ClangBuiltin<"__builtin_ia32_maxph128">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_256 : ClangBuiltin<"__builtin_ia32_maxph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_512 : ClangBuiltin<"__builtin_ia32_maxph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_min_ph_128 : ClangBuiltin<"__builtin_ia32_minph128">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_256 : ClangBuiltin<"__builtin_ia32_minph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_512 : ClangBuiltin<"__builtin_ia32_minph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_512 - : Intrinsic<[ llvm_v32i1_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_v32i1_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_256 - : Intrinsic<[ llvm_v16i1_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, + llvm_v16i1_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_128 - : Intrinsic<[ llvm_v8i1_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_v8i1_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_add_sh_round : ClangBuiltin<"__builtin_ia32_addsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_sub_sh_round : ClangBuiltin<"__builtin_ia32_subsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_mul_sh_round : ClangBuiltin<"__builtin_ia32_mulsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_div_sh_round : ClangBuiltin<"__builtin_ia32_divsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_min_sh_round : ClangBuiltin<"__builtin_ia32_minsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_max_sh_round : ClangBuiltin<"__builtin_ia32_maxsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_sh : ClangBuiltin<"__builtin_ia32_cmpsh_mask">, - Intrinsic<[ llvm_i8_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i8_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_vcomi_sh : ClangBuiltin<"__builtin_ia32_vcomish">, - Intrinsic<[ llvm_i32_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2psx_128 : ClangBuiltin<"__builtin_ia32_vcvtph2psx128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_256 : ClangBuiltin<"__builtin_ia32_vcvtph2psx256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_512 : ClangBuiltin<"__builtin_ia32_vcvtph2psx512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtps2phx_128 : ClangBuiltin<"__builtin_ia32_vcvtps2phx128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_256 : ClangBuiltin<"__builtin_ia32_vcvtps2phx256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_512 : ClangBuiltin<"__builtin_ia32_vcvtps2phx512_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_512 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2pd_128 : ClangBuiltin<"__builtin_ia32_vcvtph2pd128_mask">, - Intrinsic<[ llvm_v2f64_ty ], - [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2f64_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_256 : ClangBuiltin<"__builtin_ia32_vcvtph2pd256_mask">, - Intrinsic<[ llvm_v4f64_ty ], - [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f64_ty ], + [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_512 : ClangBuiltin<"__builtin_ia32_vcvtph2pd512_mask">, - Intrinsic<[ llvm_v8f64_ty ], - [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f64_ty ], + [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2ss_round : ClangBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtss2sh_round : ClangBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsd2sh_round : ClangBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2sd_round : ClangBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">, - Intrinsic<[ llvm_v2f64_ty ], - [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v2f64_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2w_128 : ClangBuiltin<"__builtin_ia32_vcvtph2w128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_256 : ClangBuiltin<"__builtin_ia32_vcvtph2w256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_512 : ClangBuiltin<"__builtin_ia32_vcvtph2w512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2w_128 : ClangBuiltin<"__builtin_ia32_vcvttph2w128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_256 : ClangBuiltin<"__builtin_ia32_vcvttph2w256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_512 : ClangBuiltin<"__builtin_ia32_vcvttph2w512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2uw_128 : ClangBuiltin<"__builtin_ia32_vcvtph2uw128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_256 : ClangBuiltin<"__builtin_ia32_vcvtph2uw256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_512 : ClangBuiltin<"__builtin_ia32_vcvtph2uw512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uw_128 : ClangBuiltin<"__builtin_ia32_vcvttph2uw128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_256 : ClangBuiltin<"__builtin_ia32_vcvttph2uw256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_512 : ClangBuiltin<"__builtin_ia32_vcvttph2uw512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2dq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2dq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2dq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2dq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2udq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2udq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2udq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2udq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtdq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtudq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2dq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2dq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2dq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2udq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2udq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2udq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2udq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2qq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2qq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2qq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2qq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2qq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2qq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2qq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si32 : ClangBuiltin<"__builtin_ia32_vcvtsh2si32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi32 : ClangBuiltin<"__builtin_ia32_vcvtsh2usi32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si64 : ClangBuiltin<"__builtin_ia32_vcvtsh2si64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi64 : ClangBuiltin<"__builtin_ia32_vcvtsh2usi64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi2sh : ClangBuiltin<"__builtin_ia32_vcvtusi2sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi642sh : ClangBuiltin<"__builtin_ia32_vcvtusi642sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi2sh : ClangBuiltin<"__builtin_ia32_vcvtsi2sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi642sh : ClangBuiltin<"__builtin_ia32_vcvtsi642sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si32 : ClangBuiltin<"__builtin_ia32_vcvttsh2si32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si64 : ClangBuiltin<"__builtin_ia32_vcvttsh2si64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi32 : ClangBuiltin<"__builtin_ia32_vcvttsh2usi32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsh2usi64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_sqrt_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_sqrt_sh - : Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rsqrt_ph_128 : ClangBuiltin<"__builtin_ia32_rsqrtph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_256 : ClangBuiltin<"__builtin_ia32_rsqrtph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_512 : ClangBuiltin<"__builtin_ia32_rsqrtph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_sh : ClangBuiltin<"__builtin_ia32_rsqrtsh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_128 : ClangBuiltin<"__builtin_ia32_rcpph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_256 : ClangBuiltin<"__builtin_ia32_rcpph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_512 : ClangBuiltin<"__builtin_ia32_rcpph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_sh : ClangBuiltin<"__builtin_ia32_rcpsh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_reduce_ph_128 : ClangBuiltin<"__builtin_ia32_reduceph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_256 : ClangBuiltin<"__builtin_ia32_reduceph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_512 : ClangBuiltin<"__builtin_ia32_reduceph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_sh : ClangBuiltin<"__builtin_ia32_reducesh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_128 - : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_256 - : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], + [ llvm_v16f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_512 - : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], + [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_fpclass_sh : ClangBuiltin<"__builtin_ia32_fpclasssh_mask">, - Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i8_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_ph_128 : ClangBuiltin<"__builtin_ia32_getexpph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_256 : ClangBuiltin<"__builtin_ia32_getexpph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_512 : ClangBuiltin<"__builtin_ia32_getexpph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_sh : ClangBuiltin<"__builtin_ia32_getexpsh128_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_128 : ClangBuiltin<"__builtin_ia32_getmantph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_256 : ClangBuiltin<"__builtin_ia32_getmantph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_512 : ClangBuiltin<"__builtin_ia32_getmantph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_sh : ClangBuiltin<"__builtin_ia32_getmantsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, - llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_128 : ClangBuiltin<"__builtin_ia32_rndscaleph_128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_256 : ClangBuiltin<"__builtin_ia32_rndscaleph_256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_512 : ClangBuiltin<"__builtin_ia32_rndscaleph_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_sh : ClangBuiltin<"__builtin_ia32_rndscalesh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_ph_128 : ClangBuiltin<"__builtin_ia32_scalefph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_256 : ClangBuiltin<"__builtin_ia32_scalefph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_512 : ClangBuiltin<"__builtin_ia32_scalefph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_sh : ClangBuiltin<"__builtin_ia32_scalefsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmadd_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmaddsub_ph_128 : ClangBuiltin<"__builtin_ia32_vfmaddsubph">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_vfmaddsub_ph_256 : ClangBuiltin<"__builtin_ia32_vfmaddsubph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, + llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_vfmaddsub_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmadd_f16 - : Intrinsic<[ llvm_half_ty ], - [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_half_ty ], + [ llvm_half_ty, llvm_half_ty, llvm_half_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfmaddcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfmaddcph128_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfmaddcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfmaddcph256_maskz">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfmaddcph512_mask3">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfmaddcph512_maskz">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_csh : ClangBuiltin<"__builtin_ia32_vfmaddcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_csh : ClangBuiltin<"__builtin_ia32_vfmaddcsh_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_csh : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_csh : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_cph_128 : ClangBuiltin<"__builtin_ia32_vfmulcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmulcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_256 : ClangBuiltin<"__builtin_ia32_vfmulcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmulcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_512 : ClangBuiltin<"__builtin_ia32_vfmulcph512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmulcph512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_csh : ClangBuiltin<"__builtin_ia32_vfmulcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_csh : ClangBuiltin<"__builtin_ia32_vfcmulcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; } From 50621169ae1efb4998cc64dcd9d5bab6941486de Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Fri, 4 Nov 2022 08:40:18 +0000 Subject: [PATCH 232/516] [AArch64][SVE] Extend PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)) instcombine Extend above instcombine added in D134946 to cover more flag-setting instructions. Reviewed By: peterwaller-arm Differential Revision: https://reviews.llvm.org/D136438 --- .../AArch64/AArch64TargetTransformInfo.cpp | 39 ++-- .../AArch64/sve-intrinsics-ptest.ll | 168 +++++++++++++++--- 2 files changed, 170 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cbb3d793899a9..3f98d1f00532a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -975,20 +975,22 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) { static Optional instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II) { - IntrinsicInst *Op1 = dyn_cast(II.getArgOperand(0)); - IntrinsicInst *Op2 = dyn_cast(II.getArgOperand(1)); + IntrinsicInst *Pg = dyn_cast(II.getArgOperand(0)); + IntrinsicInst *Op = dyn_cast(II.getArgOperand(1)); - if (!Op1 || !Op2) + if (!Pg || !Op) return None; + Intrinsic::ID OpIID = Op->getIntrinsicID(); + IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); - if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { - Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; - Type *Tys[] = {Op1->getArgOperand(0)->getType()}; + if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + OpIID == Intrinsic::aarch64_sve_convert_to_svbool && + Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) { + Value *Ops[] = {Pg->getArgOperand(0), Op->getArgOperand(0)}; + Type *Tys[] = {Pg->getArgOperand(0)->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); @@ -999,12 +1001,21 @@ static Optional instCombineSVEPTest(InstCombiner &IC, // Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). // Later optimizations may rewrite sequence to use the flag-setting variant // of instruction X to remove PTEST. - if ((Op1 == Op2) && - (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && - ((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) || - (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) { - Value *Ops[] = {Op1->getArgOperand(0), Op1}; - Type *Tys[] = {Op1->getType()}; + if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && + ((OpIID == Intrinsic::aarch64_sve_brka_z) || + (OpIID == Intrinsic::aarch64_sve_brkb_z) || + (OpIID == Intrinsic::aarch64_sve_brkpa_z) || + (OpIID == Intrinsic::aarch64_sve_brkpb_z) || + (OpIID == Intrinsic::aarch64_sve_rdffr_z) || + (OpIID == Intrinsic::aarch64_sve_and_z) || + (OpIID == Intrinsic::aarch64_sve_bic_z) || + (OpIID == Intrinsic::aarch64_sve_eor_z) || + (OpIID == Intrinsic::aarch64_sve_nand_z) || + (OpIID == Intrinsic::aarch64_sve_nor_z) || + (OpIID == Intrinsic::aarch64_sve_orn_z) || + (OpIID == Intrinsic::aarch64_sve_orr_z))) { + Value *Ops[] = {Pg->getArgOperand(0), Pg}; + Type *Tys[] = {Pg->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); PTest->takeName(&II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll index c29e0e0a38f65..c6ef477a4341d 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -32,29 +32,6 @@ define i1 @ptest_any2( %a) #0 { ret i1 %out } -; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). -define i1 @ptest_any_brkb_z( %pg, %a) { -; CHECK-LABEL: @ptest_any_brkb_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - -define i1 @ptest_any_rdffr_z( %pg) { -; CHECK-LABEL: @ptest_any_rdffr_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - define i1 @ptest_first( %a) #0 { ; CHECK-LABEL: @ptest_first( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 0) @@ -91,6 +68,140 @@ define i1 @ptest_last( %a) #0 { ret i1 %out } +; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). + +define i1 @ptest_any_brka_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brka_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brka.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brka.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpa_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpa_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkb_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brkb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpb_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_rdffr_z( %pg) { +; CHECK-LABEL: @ptest_any_rdffr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_and_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_and_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_bic_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_bic_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.bic.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_eor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_eor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nand_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nand_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nand.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nand.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orn_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orn_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orn.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orn.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orr_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orr.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) @@ -104,7 +215,18 @@ declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1( @llvm.aarch64.sve.convert.to.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.brka.z.nxv16i1(, ) declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) +declare @llvm.aarch64.sve.brkpa.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) declare @llvm.aarch64.sve.rdffr.z() +declare @llvm.aarch64.sve.and.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.bic.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.eor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nand.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orn.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orr.z.nxv16i1(, , ) + attributes #0 = { "target-features"="+sve" } From 529a932e3fb7db5da29134e152281e7615baf36e Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 2 Nov 2022 09:35:43 +0000 Subject: [PATCH 233/516] [AArch64] SME2 multi-vec unpack, ZIP, frint for two and four registers This patch adds the assembly/disassembly for the following instructions: SUNPK: Unpack and sign-extend multi-vector elements. UUNPK: Unpack and zero-extend multi-vector elements. ZIP (four registers): Interleave elements from four vectors. ZIP (two registers): Interleave elements from two vectors. FRINTA: Multi-vector floating-point round to integral value, to nearest with ties away from zero. FRINTM: Multi-vector floating-point round to integral value, toward minus Infinity. FRINTN: Multi-vector floating-point round to integral value, to nearest with ties to even. FRINTP: Multi-vector floating-point round to integral value, toward plus Infinity. The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D136091 --- .../lib/Target/AArch64/AArch64RegisterInfo.td | 8 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 53 ++-- llvm/lib/Target/AArch64/SMEInstrFormats.td | 225 +++++++++++---- .../test/MC/AArch64/SME2/frinta-diagnostics.s | 22 ++ llvm/test/MC/AArch64/SME2/frinta.s | 62 +++++ .../test/MC/AArch64/SME2/frintm-diagnostics.s | 22 ++ llvm/test/MC/AArch64/SME2/frintm.s | 63 +++++ .../test/MC/AArch64/SME2/frintn-diagnostics.s | 22 ++ llvm/test/MC/AArch64/SME2/frintn.s | 63 +++++ .../test/MC/AArch64/SME2/frintp-diagnostics.s | 22 ++ llvm/test/MC/AArch64/SME2/frintp.s | 62 +++++ llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s | 32 +++ llvm/test/MC/AArch64/SME2/sunpk.s | 163 +++++++++++ llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s | 32 +++ llvm/test/MC/AArch64/SME2/uunpk.s | 163 +++++++++++ llvm/test/MC/AArch64/SME2/uzp-diagnostics.s | 25 ++ llvm/test/MC/AArch64/SME2/uzp.s | 263 ++++++++++++++++++ llvm/test/MC/AArch64/SME2/zip-diagnostics.s | 25 ++ llvm/test/MC/AArch64/SME2/zip.s | 263 ++++++++++++++++++ 19 files changed, 1513 insertions(+), 77 deletions(-) create mode 100644 llvm/test/MC/AArch64/SME2/frinta-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/frinta.s create mode 100644 llvm/test/MC/AArch64/SME2/frintm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/frintm.s create mode 100644 llvm/test/MC/AArch64/SME2/frintn-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/frintn.s create mode 100644 llvm/test/MC/AArch64/SME2/frintp-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/frintp.s create mode 100644 llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/sunpk.s create mode 100644 llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/uunpk.s create mode 100644 llvm/test/MC/AArch64/SME2/uzp-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/uzp.s create mode 100644 llvm/test/MC/AArch64/SME2/zip-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/zip.s diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index b541deb837795..7b100e9026d06 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1233,6 +1233,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<2>", def ZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 2>; } + + def ZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 2>; + } } // end let EncoderMethod/DecoderMethod let EncoderMethod = "EncodeRegAsMultipleOf<4>", @@ -1252,6 +1256,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<4>", def ZZZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 4>; } + + def ZZZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 4>; + } } // end let EncoderMethod/DecoderMethod class ZPRExtendAsmOperand; defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11>; defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11>; -def FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00, 0b00>; -def FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b01, 0b00>; -def BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10, 0b00>; -def BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b11, 0b00>; - -def SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00, 0b11>; -def UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b01, 0b11>; -def SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10, 0b11>; +defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000>; +defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001>; +defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000>; +defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001>; + +defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110>; +defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111>; +defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110>; defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000>; defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001>; defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100>; @@ -366,14 +366,14 @@ defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010>; defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110>; defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011>; -def FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b010>; -def FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b010>; -def FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b011>; -def FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b011>; -def SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b100>; -def SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b100>; -def UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b101>; -def UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b101>; +defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>; +defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>; +defm FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b00011>; +defm FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b0001010>; +defm SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b00100>; +defm SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b0010000>; +defm UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b00101>; +defm UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b0010010>; defm SMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>; defm SMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>; @@ -575,6 +575,27 @@ defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">; defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">; defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">; defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">; + +defm SUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"sunpk", 0b0>; +defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>; +defm UUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"uunpk", 0b1>; +defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>; + +defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>; +defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>; +defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0110000>; +defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b0110001>; +defm ZIP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"zip", 0b0111000>; +defm UZP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"uzp", 0b0111001>; + +defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b11000>; +defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b1100000>; +defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b10100>; +defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b1010000>; +defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b10000>; +defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>; +defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>; +defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>; } let Predicates = [HasSME2, HasSMEI16I64] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index b3941fdaeaa61..4f2b2d9694ed7 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1765,82 +1765,176 @@ multiclass sme2_int_mla_long_array_vg4_multi op> { } //===----------------------------------------------------------------------===// -// SME2 multi-vec INT/ FP down convert +class sme2_frint_cvt_vg2_multisz, bits<5>op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<4> Zn; + bits<4> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b10; + let Inst{19-16} = op{4-1}; + let Inst{15-10} = 0b111000; + let Inst{9-6} = Zn; + let Inst{5} = op{0}; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +// SME2 multi-vec FP to int convert two registers +// SME2 multi-vec int to FP two registers +multiclass sme2_fp_cvt_vg2_multi op> { + def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; +} + +// SME2 multi-vec FRINT two registers +multiclass sme2_frint_vector_vg2_multi op> { + def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; +} + +class sme2_frint_zip_cvt_vg4_multisz, bits<7>op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<3> Zn; + bits<3> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b11; + let Inst{19-16} = op{6-3}; + let Inst{15-10} = 0b111000; + let Inst{9-7} = Zn; + let Inst{6-5} = op{2-1}; + let Inst{4-2} = Zd; + let Inst{1} = op{0}; + let Inst{0} = 0b0; +} -class sme2_cvt_vg2_single op, bits<2> is_int> +// SME2 multi-vec FP to int convert four registers +// SME2 multi-vec int to FP four registers +multiclass sme2_fp_cvt_vg4_multi op> { + def _S : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; +} + +// SME2 multi-vec quadwords ZIP four registers +multiclass sme2_zip_vector_vg4 op> { + def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, + mnemonic>; + def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, + mnemonic>; + def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, + mnemonic>; + def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, + mnemonic>; +} + +// SME2 multi-vec quadwords ZIP four registers +multiclass sme2_zip_vector_vg4_Q op> { + def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, + mnemonic>; +} + +// SME2 multi-vec FRINT four registers +multiclass sme2_frint_vector_vg4_multi op> { + def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, + mnemonic>; +} + +class sme2_cvt_vg2_single op> : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<5> Zd; let Inst{31-23} = 0b110000010; - let Inst{22} = op{1}; + let Inst{22} = op{3}; let Inst{21-18} = 0b1000; - let Inst{17-16} = is_int; + let Inst{17-16} = op{2-1}; let Inst{15-10} = 0b111000; let Inst{9-6} = Zn; let Inst{5} = op{0}; let Inst{4-0} = Zd; } +// SME2 multi-vec FP down convert two registers +// SME2 multi-vec int down convert two registers +multiclass sme2_cvt_vg2_single op> { + def NAME : sme2_cvt_vg2_single; +} + -class sme2_cvt_vg4_single op, ZPRRegOp zpr_ty, RegisterOperand vector_ty, - string mnemonic> - : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn), +class sme2_unpk_vector_vg2sz, bit u, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zn; + bits<4> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-10} = 0b100101111000; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = u; +} + +// SME2 multi-vec unpack two registers +multiclass sme2_unpk_vector_vg2 { + def _H : sme2_unpk_vector_vg2<0b01, u, ZZ_h_mul_r, ZPR8, mnemonic>; + def _S : sme2_unpk_vector_vg2<0b10, u, ZZ_s_mul_r, ZPR16, mnemonic>; + def _D : sme2_unpk_vector_vg2<0b11, u, ZZ_d_mul_r, ZPR32, mnemonic>; +} + + +class sme2_cvt_vg4_single op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<3> Zn; bits<5> Zd; let Inst{31-24} = 0b11000001; let Inst{23} = sz; let Inst{22} = op{2}; - let Inst{21-16} = 0b110011; - let Inst{15-10} = 0b111000; + let Inst{21-10} = 0b110011111000; let Inst{9-7} = Zn; let Inst{6-5} = op{1-0}; let Inst{4-0} = Zd; } - +// SME2 multi-vec int down convert four registers multiclass sme2_int_cvt_vg4_single op> { -def _StoB : sme2_cvt_vg4_single<0b0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; -def _DtoH : sme2_cvt_vg4_single<0b1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; +def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; +def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; } -class sme2_fp_cvt_vg2_multi op> - : I<(outs ZZ_s_mul_r:$Zd), (ins ZZ_s_mul_r:$Zn), - mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { +class sme2_unpk_vector_vg4sz, bit u, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; - bits<4> Zd; - let Inst{31-18} = 0b11000001001000; - let Inst{17-16} = op{2-1}; - let Inst{15-10} = 0b111000; + bits<3> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-10} = 0b110101111000; let Inst{9-6} = Zn; - let Inst{5} = op{0}; - let Inst{4-1} = Zd; - let Inst{0} = 0b0; + let Inst{5} = 0b0; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; + let Inst{0} = u; } - -class sme2_fp_cvt_vg4_multi op> - : I<(outs ZZZZ_s_mul_r:$Zd), (ins ZZZZ_s_mul_r:$Zn), - mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { - bits<3> Zn; - bits<3> Zd; - let Inst{31-18} = 0b11000001001100; - let Inst{17-16} = op{2-1}; - let Inst{15-10} = 0b111000; - let Inst{9-7} = Zn; - let Inst{6} = 0b0; - let Inst{5} = op{0}; - let Inst{4-2} = Zd; - let Inst{1-0} = 0b00; +// SME2 multi-vec UNPK four registers +multiclass sme2_unpk_vector_vg4 { + def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; + def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; + def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 multi-vec CLAMP registers -class sme2_clamp_vector_vg24_multi sz, bits<2> op1, bit u, - RegisterOperand multi_vector_ty, - ZPRRegOp vector_ty, string mnemonic> +class sme2_zip_clamp_vector_vg24_multi sz, bits<3> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> : I<(outs multi_vector_ty:$Zd), (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$Zd, $Zn, $Zm", @@ -1851,40 +1945,48 @@ class sme2_clamp_vector_vg24_multi sz, bits<2> op1, bit u, let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-16} = Zm; - let Inst{15-12} = 0b1100; - let Inst{11-10} = op1; + let Inst{15-13} = 0b110; + let Inst{12-10} = op1; let Inst{9-5} = Zn; let Inst{0} = u; let Constraints = "$Zd = $_Zd"; } -class sme2_clamp_vector_vg2_multi sz, bits<2> op1, bit u, - RegisterOperand multi_vector_ty, - ZPRRegOp vector_ty, string mnemonic> - : sme2_clamp_vector_vg24_multi sz, bits<3> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> + : sme2_zip_clamp_vector_vg24_multi{ bits<4> Zd; let Inst{4-1} = Zd; } multiclass sme2_fp_clamp_vector_vg2_multi{ - def _H : sme2_clamp_vector_vg2_multi<0b01, 0b00, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg2_multi<0b10, 0b00, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg2_multi<0b11, 0b00, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg2_multi{ - def _B : sme2_clamp_vector_vg2_multi<0b00, 0b01, u, ZZ_b_mul_r, ZPR8, mnemonic>; - def _H : sme2_clamp_vector_vg2_multi<0b01, 0b01, u, ZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg2_multi<0b10, 0b01, u, ZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg2_multi<0b11, 0b01, u, ZZ_d_mul_r, ZPR64, mnemonic>; + def _B : sme2_zip_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>; } -class sme2_clamp_vector_vg4_multi sz, bits<2> op1, bit u, +multiclass sme2_zip_vector_vg2 { + def _B : sme2_zip_clamp_vector_vg2_multi<0b00, 0b100, op, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b100, op, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b100, op, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b100, op, ZZ_d_mul_r, ZPR64, mnemonic>; + def _Q : sme2_zip_clamp_vector_vg2_multi<0b00, 0b101, op, ZZ_q_mul_r, ZPR128, mnemonic>; +} + +class sme2_clamp_vector_vg4_multi sz, bits<3> op1, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> - : sme2_clamp_vector_vg24_multi{ bits<3> Zd; let Inst{4-2} = Zd; @@ -1892,16 +1994,16 @@ class sme2_clamp_vector_vg4_multi sz, bits<2> op1, bit u, } multiclass sme2_fp_clamp_vector_vg4_multi{ - def _H : sme2_clamp_vector_vg4_multi<0b01, 0b10, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg4_multi<0b10, 0b10, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg4_multi<0b11, 0b10, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg4_multi{ - def _B : sme2_clamp_vector_vg4_multi<0b00, 0b11, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; - def _H : sme2_clamp_vector_vg4_multi<0b01, 0b11, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg4_multi<0b10, 0b11, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg4_multi<0b11, 0b11, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; + def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; } //===----------------------------------------------------------------------===// @@ -2567,3 +2669,4 @@ multiclass sme2_luti4_vector_vg4_index { def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; } + diff --git a/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s new file mode 100644 index 0000000000000..06f0447414e66 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frinta.s b/llvm/test/MC/AArch64/SME2/frinta.s new file mode 100644 index 0000000000000..3970a0e0e2bff --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frinta {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101100-11100000-00000000 +// CHECK-INST: frinta { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace000 + +frinta {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101100-11100001-01010100 +// CHECK-INST: frinta { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace154 + +frinta {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101100-11100001-10010110 +// CHECK-INST: frinta { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace196 + +frinta {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101100-11100011-11011110 +// CHECK-INST: frinta { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace3de + + +frinta {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111100-11100000-00000000 +// CHECK-INST: frinta { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce000 + +frinta {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111100-11100001-00010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce114 + +frinta {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111100-11100001-10010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce194 + +frinta {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-11100011-10011100 +// CHECK-INST: frinta { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce39c diff --git a/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s new file mode 100644 index 0000000000000..7deb91aef15a9 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintm.s b/llvm/test/MC/AArch64/SME2/frintm.s new file mode 100644 index 0000000000000..ff294dfef3faa --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintm {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101010-11100000-00000000 +// CHECK-INST: frintm { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae000 + +frintm {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101010-11100001-01010100 +// CHECK-INST: frintm { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae154 + +frintm {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101010-11100001-10010110 +// CHECK-INST: frintm { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae196 + +frintm {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101010-11100011-11011110 +// CHECK-INST: frintm { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae3de + + +frintm {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111010-11100000-00000000 +// CHECK-INST: frintm { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae000 + +frintm {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111010-11100001-00010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae114 + +frintm {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111010-11100001-10010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae194 + +frintm {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111010-11100011-10011100 +// CHECK-INST: frintm { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae39c + diff --git a/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s new file mode 100644 index 0000000000000..2fe00351df8f0 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintn.s b/llvm/test/MC/AArch64/SME2/frintn.s new file mode 100644 index 0000000000000..2d85b4ca878c6 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintn {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101000-11100000-00000000 +// CHECK-INST: frintn { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e000 + +frintn {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101000-11100001-01010100 +// CHECK-INST: frintn { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e154 + +frintn {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101000-11100001-10010110 +// CHECK-INST: frintn { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e196 + +frintn {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101000-11100011-11011110 +// CHECK-INST: frintn { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e3de + + +frintn {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111000-11100000-00000000 +// CHECK-INST: frintn { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e000 + +frintn {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111000-11100001-00010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e114 + +frintn {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111000-11100001-10010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e194 + +frintn {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111000-11100011-10011100 +// CHECK-INST: frintn { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e39c + diff --git a/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s new file mode 100644 index 0000000000000..7013df058690c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintp.s b/llvm/test/MC/AArch64/SME2/frintp.s new file mode 100644 index 0000000000000..600f26063ca5e --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintp {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101001-11100000-00000000 +// CHECK-INST: frintp { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e000 + +frintp {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101001-11100001-01010100 +// CHECK-INST: frintp { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e154 + +frintp {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101001-11100001-10010110 +// CHECK-INST: frintp { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e196 + +frintp {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101001-11100011-11011110 +// CHECK-INST: frintp { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e3de + + +frintp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111001-11100000-00000000 +// CHECK-INST: frintp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e000 + +frintp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111001-11100001-00010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e114 + +frintp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111001-11100001-10010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e194 + +frintp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111001-11100011-10011100 +// CHECK-INST: frintp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e39c diff --git a/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s new file mode 100644 index 0000000000000..4e7ea5c66114c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: sunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sunpk.s b/llvm/test/MC/AArch64/SME2/sunpk.s new file mode 100644 index 0000000000000..86543aa4b4f22 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000000 +// CHECK-INST: sunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e000 + +sunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010100 +// CHECK-INST: sunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x54,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e154 + +sunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110110 +// CHECK-INST: sunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb6,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b6 + +sunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111110 +// CHECK-INST: sunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3fe + + +sunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000000 +// CHECK-INST: sunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x00,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e000 + +sunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010100 +// CHECK-INST: sunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x54,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e154 + +sunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110110 +// CHECK-INST: sunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb6,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b6 + +sunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111110 +// CHECK-INST: sunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xfe,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3fe + + +sunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000000 +// CHECK-INST: sunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x00,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e000 + +sunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010100 +// CHECK-INST: sunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x54,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e154 + +sunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110110 +// CHECK-INST: sunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb6,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b6 + +sunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111110 +// CHECK-INST: sunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xfe,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3fe + + +sunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000000 +// CHECK-INST: sunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e000 + +sunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x54,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e154 + +sunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x94,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e194 + +sunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011100 +// CHECK-INST: sunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdc,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dc + + +sunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000000 +// CHECK-INST: sunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e000 + +sunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x54,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e154 + +sunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x94,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e194 + +sunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011100 +// CHECK-INST: sunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdc,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dc + + +sunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000000 +// CHECK-INST: sunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e000 + +sunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e154 + +sunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x94,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e194 + +sunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011100 +// CHECK-INST: sunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdc,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dc + diff --git a/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s new file mode 100644 index 0000000000000..05fdf348e5a96 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uunpk.s b/llvm/test/MC/AArch64/SME2/uunpk.s new file mode 100644 index 0000000000000..414dcec8928f2 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000001 +// CHECK-INST: uunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e001 + +uunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010101 +// CHECK-INST: uunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x55,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e155 + +uunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110111 +// CHECK-INST: uunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb7,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b7 + +uunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111111 +// CHECK-INST: uunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3ff + + +uunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000001 +// CHECK-INST: uunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x01,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e001 + +uunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010101 +// CHECK-INST: uunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x55,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e155 + +uunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110111 +// CHECK-INST: uunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb7,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b7 + +uunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111111 +// CHECK-INST: uunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xff,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3ff + + +uunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000001 +// CHECK-INST: uunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x01,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e001 + +uunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010101 +// CHECK-INST: uunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x55,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e155 + +uunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110111 +// CHECK-INST: uunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb7,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b7 + +uunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111111 +// CHECK-INST: uunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xff,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3ff + + +uunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000001 +// CHECK-INST: uunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e001 + +uunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x55,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e155 + +uunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x95,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e195 + +uunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011101 +// CHECK-INST: uunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdd,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dd + + +uunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000001 +// CHECK-INST: uunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e001 + +uunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x55,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e155 + +uunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x95,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e195 + +uunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011101 +// CHECK-INST: uunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdd,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dd + + +uunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000001 +// CHECK-INST: uunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e001 + +uunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x55,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e155 + +uunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x95,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e195 + +uunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011101 +// CHECK-INST: uunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdd,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dd + diff --git a/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s new file mode 100644 index 0000000000000..aa853fe23194d --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/uzp.s b/llvm/test/MC/AArch64/SME2/uzp.s new file mode 100644 index 0000000000000..4eb673e8dc700 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uzp {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000001 +// CHECK-INST: uzp { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x01,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d401 + +uzp {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010101 +// CHECK-INST: uzp { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x55,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d555 + +uzp {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110111 +// CHECK-INST: uzp { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb7,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b7 + +uzp {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111111 +// CHECK-INST: uzp { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xff,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7ff + + +uzp {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000001 +// CHECK-INST: uzp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x01,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d001 + +uzp {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010101 +// CHECK-INST: uzp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x55,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d155 + +uzp {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110111 +// CHECK-INST: uzp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b7 + +uzp {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111111 +// CHECK-INST: uzp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xff,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3ff + + +uzp {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000001 +// CHECK-INST: uzp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x01,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d001 + +uzp {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010101 +// CHECK-INST: uzp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x55,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d155 + +uzp {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110111 +// CHECK-INST: uzp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb7,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b7 + +uzp {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111111 +// CHECK-INST: uzp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xff,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3ff + + +uzp {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000001 +// CHECK-INST: uzp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x01,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d001 + +uzp {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010101 +// CHECK-INST: uzp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x55,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d155 + +uzp {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110111 +// CHECK-INST: uzp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb7,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b7 + +uzp {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111111 +// CHECK-INST: uzp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xff,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3ff + + +uzp {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000001 +// CHECK-INST: uzp { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x01,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d001 + +uzp {z20.b - z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010101 +// CHECK-INST: uzp { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d155 + +uzp {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110111 +// CHECK-INST: uzp { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b7 + +uzp {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111111 +// CHECK-INST: uzp { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3ff + + +uzp {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000010 +// CHECK-INST: uzp { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x02,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e002 + +uzp {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x16,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e116 + +uzp {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x96,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e196 + +uzp {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011110 +// CHECK-INST: uzp { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9e,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39e + + +uzp {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000010 +// CHECK-INST: uzp { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e002 + +uzp {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x16,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e116 + +uzp {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x96,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e196 + +uzp {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011110 +// CHECK-INST: uzp { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9e,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39e + + +uzp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000010 +// CHECK-INST: uzp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x02,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e002 + +uzp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x16,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e116 + +uzp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x96,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e196 + +uzp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011110 +// CHECK-INST: uzp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9e,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39e + + +uzp {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000010 +// CHECK-INST: uzp { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x02,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e002 + +uzp {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x16,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e116 + +uzp {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x96,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e196 + +uzp {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011110 +// CHECK-INST: uzp { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9e,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39e + + +uzp {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000010 +// CHECK-INST: uzp { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x02,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e002 + +uzp {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x16,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e116 + +uzp {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x96,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e196 + +uzp {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011110 +// CHECK-INST: uzp { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9e,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39e + diff --git a/llvm/test/MC/AArch64/SME2/zip-diagnostics.s b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s new file mode 100644 index 0000000000000..6c80096ab4865 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +zip {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: zip {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: zip {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/zip.s b/llvm/test/MC/AArch64/SME2/zip.s new file mode 100644 index 0000000000000..cfb048710ca43 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zip {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000000 +// CHECK-INST: zip { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x00,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d400 + +zip {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010100 +// CHECK-INST: zip { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x54,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d554 + +zip {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110110 +// CHECK-INST: zip { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb6,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b6 + +zip {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111110 +// CHECK-INST: zip { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xfe,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7fe + + +zip {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000000 +// CHECK-INST: zip { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d000 + +zip {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010100 +// CHECK-INST: zip { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d154 + +zip {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110110 +// CHECK-INST: zip { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb6,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b6 + +zip {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111110 +// CHECK-INST: zip { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfe,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3fe + + +zip {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000000 +// CHECK-INST: zip { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d000 + +zip {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010100 +// CHECK-INST: zip { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d154 + +zip {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110110 +// CHECK-INST: zip { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb6,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b6 + +zip {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111110 +// CHECK-INST: zip { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfe,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3fe + + +zip {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000000 +// CHECK-INST: zip { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d000 + +zip {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010100 +// CHECK-INST: zip { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d154 + +zip {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110110 +// CHECK-INST: zip { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b6 + +zip {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111110 +// CHECK-INST: zip { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3fe + + +zip {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000000 +// CHECK-INST: zip { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d000 + +zip {z20.b, z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010100 +// CHECK-INST: zip { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x54,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d154 + +zip {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110110 +// CHECK-INST: zip { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb6,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b6 + +zip {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111110 +// CHECK-INST: zip { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfe,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3fe + + +zip {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000000 +// CHECK-INST: zip { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x00,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e000 + +zip {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010100 +// CHECK-INST: zip { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x14,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e114 + +zip {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010100 +// CHECK-INST: zip { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x94,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e194 + +zip {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011100 +// CHECK-INST: zip { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9c,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39c + + +zip {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000000 +// CHECK-INST: zip { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e000 + +zip {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010100 +// CHECK-INST: zip { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e114 + +zip {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010100 +// CHECK-INST: zip { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x94,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e194 + +zip {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011100 +// CHECK-INST: zip { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39c + + +zip {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000000 +// CHECK-INST: zip { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e000 + +zip {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010100 +// CHECK-INST: zip { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e114 + +zip {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010100 +// CHECK-INST: zip { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e194 + +zip {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011100 +// CHECK-INST: zip { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39c + + +zip {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000000 +// CHECK-INST: zip { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e000 + +zip {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010100 +// CHECK-INST: zip { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e114 + +zip {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010100 +// CHECK-INST: zip { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x94,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e194 + +zip {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011100 +// CHECK-INST: zip { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39c + + +zip {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000000 +// CHECK-INST: zip { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e000 + +zip {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010100 +// CHECK-INST: zip { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x14,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e114 + +zip {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010100 +// CHECK-INST: zip { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x94,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e194 + +zip {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011100 +// CHECK-INST: zip { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9c,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39c + From 304f1d59ca41872c094def3aee0a8689df6aa398 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 2 Nov 2022 16:46:56 +0100 Subject: [PATCH 234/516] [IR] Switch everything to use memory attribute This switches everything to use the memory attribute proposed in https://discourse.llvm.org/t/rfc-unify-memory-effect-attributes/65579. The old argmemonly, inaccessiblememonly and inaccessiblemem_or_argmemonly attributes are dropped. The readnone, readonly and writeonly attributes are restricted to parameters only. The old attributes are auto-upgraded both in bitcode and IR. The bitcode upgrade is a policy requirement that has to be retained indefinitely. The IR upgrade is mainly there so it's not necessary to update all tests using memory attributes in this patch, which is already large enough. We could drop that part after migrating tests, or retain it longer term, to make it easier to import IR from older LLVM versions. High-level Function/CallBase APIs like doesNotAccessMemory() or setDoesNotAccessMemory() are mapped transparently to the memory attribute. Code that directly manipulates attributes (e.g. via AttributeList) on the other hand needs to switch to working with the memory attribute instead. Differential Revision: https://reviews.llvm.org/D135780 --- clang/lib/CodeGen/CGCall.cpp | 25 +- clang/lib/CodeGen/CGObjCMac.cpp | 11 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 5 +- clang/test/CodeGen/asm-attrs.c | 4 +- clang/test/CodeGen/builtin-sqrt.c | 4 +- clang/test/CodeGen/complex-builtins.c | 4 +- clang/test/CodeGen/complex-libcalls.c | 4 +- clang/test/CodeGen/function-attributes.c | 4 +- clang/test/CodeGen/libcall-declarations.c | 8 +- clang/test/CodeGen/libcalls.c | 4 +- clang/test/CodeGen/math-builtins.c | 16 +- clang/test/CodeGen/math-libcalls.c | 18 +- clang/test/CodeGen/ms-declspecs.c | 2 +- clang/test/CodeGen/pragma-weak.c | 2 +- clang/test/CodeGen/struct-passing.c | 4 +- .../2009-05-04-PureConstNounwind.cpp | 8 +- clang/test/CodeGenCXX/dynamic-cast.cpp | 2 +- clang/test/CodeGenCXX/threadlocal_address.cpp | 2 +- clang/test/CodeGenObjC/class-stubs.m | 2 +- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- .../CodeGenOpenCL/fdeclare-opencl-builtins.cl | 4 +- clang/test/OpenMP/barrier_codegen.cpp | 2 +- clang/test/OpenMP/irbuilder_simd_aligned.cpp | 2 +- clang/test/Sema/libbuiltins-ctype-powerpc64.c | 4 +- clang/test/Sema/libbuiltins-ctype-x86_64.c | 4 +- llvm/docs/LangRef.rst | 102 ++--- llvm/docs/ReleaseNotes.rst | 21 ++ llvm/include/llvm/AsmParser/LLToken.h | 5 + .../include/llvm/Frontend/OpenMP/OMPKinds.def | 71 ++-- llvm/include/llvm/IR/Attributes.td | 17 +- llvm/include/llvm/IR/Function.h | 49 +-- llvm/include/llvm/IR/InstrTypes.h | 106 +----- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 24 +- llvm/lib/AsmParser/LLLexer.cpp | 3 + llvm/lib/AsmParser/LLParser.cpp | 39 +- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 90 ++++- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 6 - llvm/lib/CodeGen/MachineVerifier.cpp | 8 +- llvm/lib/IR/Function.cpp | 60 +++ llvm/lib/IR/Instructions.cpp | 82 +++- llvm/lib/IR/Verifier.cpp | 22 -- llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp | 4 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 +- llvm/lib/Target/Mips/Mips16HardFloat.cpp | 4 +- .../Transforms/IPO/AttributorAttributes.cpp | 163 +++++--- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 80 +--- .../Instrumentation/DataFlowSanitizer.cpp | 19 +- .../Instrumentation/MemorySanitizer.cpp | 15 +- .../Scalar/PartiallyInlineLibCalls.cpp | 7 +- .../Scalar/RewriteStatepointsForGC.cpp | 5 +- llvm/lib/Transforms/Scalar/SCCP.cpp | 25 +- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 5 - llvm/lib/Transforms/Utils/CodeExtractor.cpp | 9 +- llvm/test/Analysis/BasicAA/cs-cs.ll | 24 +- llvm/test/Analysis/BasicAA/intrinsics-arm.ll | 4 +- llvm/test/Analysis/BasicAA/intrinsics.ll | 4 +- llvm/test/Analysis/BasicAA/pure-const-dce.ll | 4 +- .../TypeBasedAliasAnalysis/functionattrs.ll | 14 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 4 +- .../aarch64-intrinsics-attributes.ll | 2 +- ...masked-load-store-intrinsics-attributes.ll | 6 +- llvm/test/Bindings/llvm-c/debug_info.ll | 6 +- llvm/test/Bitcode/attributes-3.3.ll | 4 +- llvm/test/Bitcode/attributes.ll | 12 +- llvm/test/Bitcode/compatibility-3.6.ll | 14 +- llvm/test/Bitcode/compatibility-3.7.ll | 14 +- llvm/test/Bitcode/compatibility-3.8.ll | 18 +- llvm/test/Bitcode/compatibility-3.9.ll | 20 +- llvm/test/Bitcode/compatibility-4.0.ll | 20 +- llvm/test/Bitcode/compatibility-5.0.ll | 22 +- llvm/test/Bitcode/compatibility-6.0.ll | 20 +- llvm/test/Bitcode/compatibility.ll | 20 +- llvm/test/Bitcode/ptest-new.ll | 2 +- llvm/test/Bitcode/ptest-old.ll | 2 +- llvm/test/Bitcode/upgrade-frame-pointer.ll | 4 +- .../upgrade-invariant-group-barrier.ll | 4 +- .../AMDGPU/addrspacecast-constantexpr.ll | 4 +- .../annotate-kernel-features-hsa-call.ll | 4 +- .../AMDGPU/annotate-kernel-features-hsa.ll | 4 +- .../AMDGPU/annotate-kernel-features.ll | 4 +- llvm/test/CodeGen/AMDGPU/inline-attr.ll | 12 +- .../AMDGPU/pal-simple-indirect-call.ll | 4 +- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 2 +- .../uniform-work-group-recursion-test.ll | 4 +- .../Feature/OperandBundles/function-attrs.ll | 6 +- llvm/test/Feature/intrinsics.ll | 2 +- .../DataFlowSanitizer/basic.ll | 4 +- .../MemorySanitizer/attributes.ll | 7 +- llvm/test/Other/attribute-comment.ll | 2 +- llvm/test/Other/cgscc-devirt-iteration.ll | 14 +- .../Other/cgscc-iterate-function-mutation.ll | 2 +- llvm/test/Other/invariant.group.ll | 4 +- llvm/test/Other/opt-override-mcpu-mattr.ll | 4 +- llvm/test/Other/print-module-scope.ll | 4 +- .../2008-02-01-ReturnAttrs.ll | 14 +- .../2008-07-02-array-indexing.ll | 16 +- .../ArgumentPromotion/2008-09-07-CGUpdate.ll | 12 +- .../2008-09-08-CGUpdateSelfEdge.ll | 6 +- .../ArgumentPromotion/X86/attributes.ll | 28 +- .../X86/min-legal-vector-width.ll | 72 ++-- .../ArgumentPromotion/aggregate-promote.ll | 14 +- .../Attributor/ArgumentPromotion/alignment.ll | 18 +- .../Attributor/ArgumentPromotion/attrs.ll | 12 +- .../Attributor/ArgumentPromotion/basictest.ll | 18 +- .../Attributor/ArgumentPromotion/byval-2.ll | 12 +- .../Attributor/ArgumentPromotion/byval.ll | 30 +- .../Attributor/ArgumentPromotion/chained.ll | 16 +- .../ArgumentPromotion/control-flow.ll | 16 +- .../ArgumentPromotion/control-flow2.ll | 14 +- .../Attributor/ArgumentPromotion/crash.ll | 34 +- .../Attributor/ArgumentPromotion/fp80.ll | 20 +- .../Attributor/ArgumentPromotion/inalloca.ll | 26 +- .../ArgumentPromotion/invalidation.ll | 6 +- .../live_called_from_dead.ll | 20 +- .../live_called_from_dead_2.ll | 24 +- .../Attributor/ArgumentPromotion/musttail.ll | 67 ++-- .../pr33641_remove_arg_dbgvalue.ll | 14 +- .../Attributor/ArgumentPromotion/sret.ll | 18 +- .../IPConstantProp/2009-09-24-byval-ptr.ll | 16 +- .../Attributor/IPConstantProp/PR16052.ll | 22 +- .../Attributor/IPConstantProp/PR26044.ll | 14 +- .../Attributor/IPConstantProp/PR43857.ll | 12 +- .../IPConstantProp/arg-count-mismatch.ll | 22 +- .../IPConstantProp/arg-type-mismatch.ll | 10 +- .../Attributor/IPConstantProp/comdat-ipo.ll | 6 +- .../IPConstantProp/dangling-block-address.ll | 16 +- .../Attributor/IPConstantProp/deadarg.ll | 4 +- .../IPConstantProp/fp-bc-icmp-const-fold.ll | 4 +- .../Attributor/IPConstantProp/global.ll | 6 +- .../IPConstantProp/multiple_callbacks.ll | 18 +- .../IPConstantProp/musttail-call.ll | 4 +- .../Attributor/IPConstantProp/pthreads.ll | 10 +- .../Attributor/IPConstantProp/recursion.ll | 10 +- .../IPConstantProp/remove-call-inst.ll | 14 +- .../IPConstantProp/return-argument.ll | 23 +- .../IPConstantProp/return-constant.ll | 24 +- .../IPConstantProp/return-constants.ll | 22 +- ...fter-each-resolving-undefs-for-function.ll | 16 +- .../IPConstantProp/thread_local_acs.ll | 4 +- llvm/test/Transforms/Attributor/align.ll | 151 ++++---- llvm/test/Transforms/Attributor/allow_list.ll | 8 +- .../Transforms/Attributor/alwaysinline.ll | 14 +- .../Attributor/call-simplify-pointer-info.ll | 26 +- .../Attributor/cb_liveness_disabled.ll | 10 +- .../Attributor/cb_liveness_enabled.ll | 10 +- .../Attributor/cb_range_disabled.ll | 10 +- .../Transforms/Attributor/cb_range_enabled.ll | 10 +- llvm/test/Transforms/Attributor/depgraph.ll | 6 +- .../Attributor/dereferenceable-1.ll | 212 +++++++---- .../dereferenceable-2-inseltpoison.ll | 64 ++-- .../Attributor/dereferenceable-2.ll | 64 ++-- .../Transforms/Attributor/heap_to_stack.ll | 4 +- .../Attributor/heap_to_stack_gpu.ll | 2 +- .../Transforms/Attributor/internal-noalias.ll | 60 +-- .../test/Transforms/Attributor/internalize.ll | 6 +- llvm/test/Transforms/Attributor/liveness.ll | 165 ++++---- .../Transforms/Attributor/liveness_chains.ll | 11 +- llvm/test/Transforms/Attributor/lowerheap.ll | 2 +- .../Attributor/lvi-after-jumpthreading.ll | 8 +- .../Transforms/Attributor/lvi-for-ashr.ll | 4 +- .../Transforms/Attributor/memory_locations.ll | 134 +++---- llvm/test/Transforms/Attributor/misc.ll | 13 +- llvm/test/Transforms/Attributor/misc_crash.ll | 12 +- llvm/test/Transforms/Attributor/noalias.ll | 141 ++++--- .../test/Transforms/Attributor/nocapture-1.ll | 224 ++++++----- .../test/Transforms/Attributor/nocapture-2.ll | 355 +++++++++++------- llvm/test/Transforms/Attributor/nodelete.ll | 20 +- llvm/test/Transforms/Attributor/nofree.ll | 58 ++- llvm/test/Transforms/Attributor/nonnull.ll | 109 +++--- llvm/test/Transforms/Attributor/norecurse.ll | 80 ++-- llvm/test/Transforms/Attributor/noreturn.ll | 38 +- .../Transforms/Attributor/noreturn_async.ll | 2 +- .../Transforms/Attributor/noreturn_sync.ll | 2 +- llvm/test/Transforms/Attributor/nosync.ll | 62 ++- llvm/test/Transforms/Attributor/nounwind.ll | 16 +- .../Transforms/Attributor/openmp_parallel.ll | 7 +- .../Transforms/Attributor/pointer-info.ll | 18 +- llvm/test/Transforms/Attributor/potential.ll | 108 +++--- llvm/test/Transforms/Attributor/range.ll | 292 +++++++------- .../read_write_returned_arguments_scc.ll | 47 +-- llvm/test/Transforms/Attributor/readattrs.ll | 153 ++++---- llvm/test/Transforms/Attributor/returned.ll | 138 ++++--- .../Attributor/undefined_behavior.ll | 180 ++++----- .../Attributor/value-simplify-assume.ll | 115 +++--- .../Attributor/value-simplify-dbg.ll | 6 +- .../Attributor/value-simplify-gpu.ll | 42 +-- .../Attributor/value-simplify-instances.ll | 81 ++-- .../Attributor/value-simplify-local-remote.ll | 118 +++--- .../value-simplify-pointer-info-struct.ll | 12 +- .../Attributor/value-simplify-pointer-info.ll | 144 ++++--- .../Transforms/Attributor/value-simplify.ll | 219 ++++++----- llvm/test/Transforms/Attributor/willreturn.ll | 300 +++++++++------ llvm/test/Transforms/Attributor/wrapper.ll | 4 +- .../Transforms/Coroutines/coro-readnone-02.ll | 2 +- .../DeadArgElim/2010-04-30-DbgInfo.ll | 2 +- .../FunctionAttrs/2008-09-03-Mutual.ll | 4 +- .../FunctionAttrs/2008-09-03-ReadNone.ll | 6 +- .../FunctionAttrs/2008-09-03-ReadOnly.ll | 2 +- .../FunctionAttrs/2008-12-29-Constant.ll | 2 +- .../Transforms/FunctionAttrs/argmemonly.ll | 42 +-- llvm/test/Transforms/FunctionAttrs/atomic.ll | 4 +- .../Transforms/FunctionAttrs/convergent.ll | 8 +- .../FunctionAttrs/incompatible_fn_attrs.ll | 6 +- .../FunctionAttrs/int_sideeffect.ll | 4 +- .../FunctionAttrs/nofree-attributor.ll | 14 +- llvm/test/Transforms/FunctionAttrs/nofree.ll | 6 +- .../Transforms/FunctionAttrs/norecurse.ll | 28 +- llvm/test/Transforms/FunctionAttrs/nosync.ll | 38 +- .../test/Transforms/FunctionAttrs/nounwind.ll | 6 +- llvm/test/Transforms/FunctionAttrs/optnone.ll | 2 +- .../Transforms/FunctionAttrs/readattrs.ll | 32 +- llvm/test/Transforms/FunctionAttrs/stats.ll | 4 +- .../FunctionAttrs/willreturn-callsites.ll | 2 +- .../Transforms/FunctionAttrs/willreturn.ll | 18 +- .../Transforms/FunctionAttrs/writeonly.ll | 28 +- llvm/test/Transforms/GlobalOpt/ctor-memset.ll | 2 +- llvm/test/Transforms/GlobalOpt/pr54572.ll | 2 +- .../Transforms/InferFunctionAttrs/annotate.ll | 34 +- .../InferFunctionAttrs/norecurse_debug.ll | 2 +- .../readonly_and_writeonly.ll | 2 +- llvm/test/Transforms/Inline/cgscc-update.ll | 28 +- llvm/test/Transforms/Inline/inline_invoke.ll | 4 +- .../AArch64/2012-04-23-Neon-Intrinsics.ll | 4 +- llvm/test/Transforms/InstCombine/stpncpy-1.ll | 4 +- llvm/test/Transforms/LICM/scalar-promote.ll | 12 +- llvm/test/Transforms/LICM/strlen.ll | 2 +- .../strided-store-double.ll | 2 +- llvm/test/Transforms/ObjCARC/basic.ll | 2 +- ...e-that-exception-unwind-path-is-visited.ll | 2 +- llvm/test/Transforms/ObjCARC/nested.ll | 2 +- llvm/test/Transforms/ObjCARC/rle-s2l.ll | 2 +- llvm/test/Transforms/OpenMP/add_attributes.ll | 136 +++---- .../Transforms/OpenMP/parallel_deletion.ll | 12 +- .../Transforms/OpenMP/remove_globalization.ll | 14 +- .../OpenMP/replace_globalization.ll | 25 +- llvm/test/Transforms/OpenMP/spmdization.ll | 48 +-- .../Transforms/OpenMP/spmdization_assumes.ll | 2 +- .../Transforms/OpenMP/spmdization_guarding.ll | 8 +- ...mdization_guarding_two_reaching_kernels.ll | 26 +- .../X86/intrinsic-attributes.ll | 2 +- .../statepoint-attrs.ll | 2 +- .../Transforms/SCCP/ipscp-drop-argmemonly.ll | 31 +- llvm/test/Transforms/SCCP/remove-call-inst.ll | 2 +- .../SampleProfile/pseudo-probe-emit.ll | 2 +- ...patible-invokes-of-landingpad-debuginfo.ll | 2 +- .../merge-compatible-invokes-of-landingpad.ll | 2 +- .../SimplifyCFG/preserve-branchweights.ll | 2 +- llvm/test/Verifier/fp-intrinsics.ll | 2 +- llvm/test/Verifier/writeonly.ll | 13 - .../Inputs/check_attrs.ll.funcattrs.expected | 2 +- ...ious_ir_values.ll.funcsig.globals.expected | 4 +- .../remove-attributes-from-intrinsics.ll | 2 +- llvm/unittests/IR/InstructionsTest.cpp | 4 +- llvm/utils/TableGen/IntrinsicEmitter.cpp | 46 +-- llvm/utils/UpdateTestChecks/common.py | 2 +- mlir/test/Target/LLVMIR/llvmir.mlir | 2 +- 256 files changed, 3686 insertions(+), 3501 deletions(-) delete mode 100644 llvm/test/Verifier/writeonly.ll diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8cea9f3397b67..b7d6ea2d3cc62 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2121,6 +2121,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // The NoBuiltinAttr attached to the target FunctionDecl. const NoBuiltinAttr *NBA = nullptr; + // Some ABIs may result in additional accesses to arguments that may + // otherwise not be present. + auto AddPotentialArgAccess = [&]() { + llvm::Attribute A = FuncAttrs.getAttribute(llvm::Attribute::Memory); + if (A.isValid()) + FuncAttrs.addMemoryAttr(A.getMemoryEffects() | + llvm::MemoryEffects::argMemOnly()); + }; + // Collect function IR attributes based on declaration-specific // information. // FIXME: handle sseregparm someday... @@ -2167,18 +2176,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // 'const', 'pure' and 'noalias' attributed functions are also nounwind. if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadNone); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::none()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'const' functions have greater restrictions than // 'pure' functions, so they also cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'pure' functions cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } if (TargetDecl->hasAttr()) @@ -2356,8 +2365,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { // inalloca and sret disable readnone and readonly - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); break; } @@ -2527,9 +2535,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, Attrs.addAlignmentAttr(Align.getQuantity()); // byval disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); - + AddPotentialArgAccess(); break; } case ABIArgInfo::IndirectAliased: { @@ -2545,8 +2551,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: // inalloca disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); continue; } diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index c3b8e6e8afa91..92eadb39e6ded 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -737,14 +737,17 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { // Also it is safe to make it readnone, since we never load or store the // classref except by calling this function. llvm::Type *params[] = { Int8PtrPtrTy }; + llvm::LLVMContext &C = CGM.getLLVMContext(); + llvm::AttributeSet AS = llvm::AttributeSet::get(C, { + llvm::Attribute::get(C, llvm::Attribute::NonLazyBind), + llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()), + llvm::Attribute::get(C, llvm::Attribute::NoUnwind), + }); llvm::FunctionCallee F = CGM.CreateRuntimeFunction( llvm::FunctionType::get(ClassnfABIPtrTy, params, false), "objc_loadClassref", llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NonLazyBind, - llvm::Attribute::ReadNone, - llvm::Attribute::NoUnwind})); + llvm::AttributeList::FunctionIndex, AS)); if (!CGM.getTriple().isOSBinFormatCOFF()) cast(F.getCallee())->setLinkage( llvm::Function::ExternalWeakLinkage); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index f8491d3f59ac7..5f7c739a76786 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1325,8 +1325,9 @@ static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false); // Mark the function as nounwind readonly. - llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, - llvm::Attribute::ReadOnly }; + llvm::AttrBuilder FuncAttrs(CGF.getLLVMContext()); + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); llvm::AttributeList Attrs = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); diff --git a/clang/test/CodeGen/asm-attrs.c b/clang/test/CodeGen/asm-attrs.c index fa07601cc01ed..6d95e10d0af0b 100644 --- a/clang/test/CodeGen/asm-attrs.c +++ b/clang/test/CodeGen/asm-attrs.c @@ -10,9 +10,9 @@ // CHECK: call void asm sideeffect "foo7", {{.*}} [[NOATTRS]] // CHECK: call i32 asm "foo8", {{.*}} [[READNONE]] -// CHECK: attributes [[READNONE]] = { nounwind readnone } +// CHECK: attributes [[READNONE]] = { nounwind memory(none) } // CHECK: attributes [[NOATTRS]] = { nounwind } -// CHECK: attributes [[READONLY]] = { nounwind readonly } +// CHECK: attributes [[READONLY]] = { nounwind memory(read) } int g0, g1; diff --git a/clang/test/CodeGen/builtin-sqrt.c b/clang/test/CodeGen/builtin-sqrt.c index 32300085682bd..2313a68d2d0e2 100644 --- a/clang/test/CodeGen/builtin-sqrt.c +++ b/clang/test/CodeGen/builtin-sqrt.c @@ -8,8 +8,8 @@ float foo(float X) { } // HAS_ERRNO: declare float @sqrtf(float noundef) [[ATTR:#[0-9]+]] -// HAS_ERRNO-NOT: attributes [[ATTR]] = {{{.*}} readnone +// HAS_ERRNO-NOT: attributes [[ATTR]] = {{{.*}} memory(none) // NO_ERRNO: declare float @llvm.sqrt.f32(float) [[ATTR:#[0-9]+]] -// NO_ERRNO: attributes [[ATTR]] = { nocallback nofree nosync nounwind readnone {{.*}}} +// NO_ERRNO: attributes [[ATTR]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGen/complex-builtins.c b/clang/test/CodeGen/complex-builtins.c index 5dc5424ae9316..29d6e7ba909f6 100644 --- a/clang/test/CodeGen/complex-builtins.c +++ b/clang/test/CodeGen/complex-builtins.c @@ -197,9 +197,9 @@ void foo(float f) { // HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl(ptr noundef byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} } diff --git a/clang/test/CodeGen/complex-libcalls.c b/clang/test/CodeGen/complex-libcalls.c index 3a197a2f2005b..7d4e3d04cf64a 100644 --- a/clang/test/CodeGen/complex-libcalls.c +++ b/clang/test/CodeGen/complex-libcalls.c @@ -197,9 +197,9 @@ void foo(float f) { // HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl(ptr noundef byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} } diff --git a/clang/test/CodeGen/function-attributes.c b/clang/test/CodeGen/function-attributes.c index 408a4f16f9b6c..38a0fb03de1e2 100644 --- a/clang/test/CodeGen/function-attributes.c +++ b/clang/test/CodeGen/function-attributes.c @@ -111,9 +111,9 @@ void f20(void) { // CHECK: attributes [[NUW]] = { nounwind optsize{{.*}} } // CHECK: attributes [[AI]] = { alwaysinline nounwind optsize{{.*}} } -// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize readnone{{.*}} } +// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize willreturn memory(none){{.*}} } // CHECK: attributes [[SR]] = { nounwind optsize{{.*}} "stackrealign"{{.*}} } // CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} } // CHECK: attributes [[NR]] = { noreturn optsize } -// CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone willreturn } +// CHECK: attributes [[NUW_RN]] = { nounwind optsize willreturn memory(none) } // CHECK: attributes [[RT_CALL]] = { optsize returns_twice } diff --git a/clang/test/CodeGen/libcall-declarations.c b/clang/test/CodeGen/libcall-declarations.c index e39263c3d5c4c..ebdb05d7ff109 100644 --- a/clang/test/CodeGen/libcall-declarations.c +++ b/clang/test/CodeGen/libcall-declarations.c @@ -614,8 +614,8 @@ void *use[] = { // CHECK-ERRNO: declare { double, double } @ctanh(double noundef, double noundef) [[NONCONST]] // CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float> noundef) [[NONCONST]] -// CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } -// CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } +// CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind willreturn memory(read){{.*}} } -// CHECK-ERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } -// CHECK-ERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } +// CHECK-ERRNO: attributes [[NUWRN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-ERRNO: attributes [[NUWRO]] = { nounwind willreturn memory(read){{.*}} } diff --git a/clang/test/CodeGen/libcalls.c b/clang/test/CodeGen/libcalls.c index 8313b9a742df8..42b6df4ccbe4a 100644 --- a/clang/test/CodeGen/libcalls.c +++ b/clang/test/CodeGen/libcalls.c @@ -124,5 +124,5 @@ void test_builtins(double d, float f, long double ld) { } // CHECK-YES: attributes [[NUW]] = { nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } -// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind readnone{{.*}} } -// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGen/math-builtins.c b/clang/test/CodeGen/math-builtins.c index 965ed77b54be3..559421a4882c5 100644 --- a/clang/test/CodeGen/math-builtins.c +++ b/clang/test/CodeGen/math-builtins.c @@ -680,16 +680,16 @@ __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin // HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } -// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } +// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// NO__ERRNO: attributes [[PURE]] = { {{.*}}readonly{{.*}} } +// NO__ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO: attributes [[PURE]] = { {{.*}}readonly{{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } -// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index 52a08d2652817..0e61f92f8c751 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -704,18 +704,18 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } -// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } +// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// NO__ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} } +// NO__ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_MAYTRAP: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_MAYTRAP: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_MAYTRAP: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } -// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } diff --git a/clang/test/CodeGen/ms-declspecs.c b/clang/test/CodeGen/ms-declspecs.c index ff9a143f241ec..e390dddbe2b47 100644 --- a/clang/test/CodeGen/ms-declspecs.c +++ b/clang/test/CodeGen/ms-declspecs.c @@ -41,4 +41,4 @@ void noalias_caller(int *x) { noalias_callee(x); } // CHECK: attributes [[NUW]] = { nounwind{{.*}} } // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} } // CHECK: attributes [[NR]] = { noreturn } -// CHECK: attributes [[NA]] = { argmemonly nounwind{{.*}} } +// CHECK: attributes [[NA]] = { nounwind memory(argmem: readwrite){{.*}} } diff --git a/clang/test/CodeGen/pragma-weak.c b/clang/test/CodeGen/pragma-weak.c index 306ce306f4640..52328bf9ff1be 100644 --- a/clang/test/CodeGen/pragma-weak.c +++ b/clang/test/CodeGen/pragma-weak.c @@ -202,4 +202,4 @@ void zzz(void){} int correct_linkage; // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} } -// CHECK: attributes [[RN]] = { noinline nounwind optnone readnone{{.*}} } +// CHECK: attributes [[RN]] = { noinline nounwind optnone willreturn memory(none){{.*}} } diff --git a/clang/test/CodeGen/struct-passing.c b/clang/test/CodeGen/struct-passing.c index ad7b813320849..c8cfeb9c8168a 100644 --- a/clang/test/CodeGen/struct-passing.c +++ b/clang/test/CodeGen/struct-passing.c @@ -23,5 +23,5 @@ void *ps[] = { f0, f1, f2, f3, f4, f5 }; // CHECK: declare void @f4({{.*}} byval({{.*}}) align 4) // CHECK: declare void @f5({{.*}} byval({{.*}}) align 4) -// CHECK: attributes [[RN]] = { nounwind readnone{{.*}} } -// CHECK: attributes [[RO]] = { nounwind readonly{{.*}} } +// CHECK: attributes [[RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK: attributes [[RO]] = { nounwind willreturn memory(read){{.*}} } diff --git a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp index 99d172239daa3..cf95b54fb8c07 100644 --- a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp +++ b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp @@ -15,8 +15,8 @@ int f(void) { // CHECK: declare noundef i32 @_Z1tv() [[TF2:#[0-9]+]] // CHECK: attributes [[TF]] = { {{.*}} } -// CHECK: attributes [[NUW_RN]] = { nounwind readnone willreturn{{.*}} } -// CHECK: attributes [[NUW_RO]] = { nounwind readonly willreturn{{.*}} } +// CHECK: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } // CHECK: attributes [[TF2]] = { {{.*}} } -// CHECK: attributes [[NUW_RN_CALL]] = { nounwind readnone willreturn } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RN_CALL]] = { nounwind willreturn memory(none) } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/clang/test/CodeGenCXX/dynamic-cast.cpp b/clang/test/CodeGenCXX/dynamic-cast.cpp index 86e0f62bc9f98..1d36376a55bc7 100644 --- a/clang/test/CodeGenCXX/dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast.cpp @@ -20,5 +20,5 @@ const B& f(A *a) { // CHECK: declare ptr @__dynamic_cast(ptr, ptr, ptr, i64) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly } +// CHECK: attributes [[NUW_RO]] = { nounwind memory(read) } // CHECK: attributes [[NR]] = { noreturn } diff --git a/clang/test/CodeGenCXX/threadlocal_address.cpp b/clang/test/CodeGenCXX/threadlocal_address.cpp index cb63bc2759906..0ae58ab550029 100644 --- a/clang/test/CodeGenCXX/threadlocal_address.cpp +++ b/clang/test/CodeGenCXX/threadlocal_address.cpp @@ -51,4 +51,4 @@ int f() { // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] // CHECK-O1-NEXT: ret i32 %[[INC]] // -// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGenObjC/class-stubs.m b/clang/test/CodeGenObjC/class-stubs.m index 5cd3d575596cb..d73b541dd97ba 100644 --- a/clang/test/CodeGenObjC/class-stubs.m +++ b/clang/test/CodeGenObjC/class-stubs.m @@ -81,4 +81,4 @@ - (void) anotherInstanceMethod { @end // -- calls to objc_loadClassRef() are readnone -// CHECK: attributes [[ATTRLIST]] = { nounwind nonlazybind readnone } +// CHECK: attributes [[ATTRLIST]] = { nounwind nonlazybind memory(none) } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 9696f3536e2f6..600194e5e6c13 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -796,7 +796,7 @@ kernel void test_s_setreg(uint val) { // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} -// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind readonly } +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind memory(read) } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[$EXEC]] = !{!"exec"} // CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} diff --git a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl index f0ebf6e3c0eac..ac3bff9dbde27 100644 --- a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl @@ -49,6 +49,6 @@ void test_generic_optionality(float a, float *b) { } // CHECK: attributes [[ATTR_CONST]] = -// CHECK-SAME: readnone +// CHECK-SAME: memory(none) // CHECK: attributes [[ATTR_PURE]] = -// CHECK-SAME: readonly +// CHECK-SAME: memory(read) diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp index 0b2de036ce119..e06503077d66b 100644 --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) { // CLANGCG: declare i32 @__kmpc_global_thread_num(ptr) // IRBUILDER: ; Function Attrs: nounwind // IRBUILDER-NEXT: declare i32 @__kmpc_global_thread_num(ptr) # -// IRBUILDER_OPT: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +// IRBUILDER_OPT: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) // IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(ptr nocapture nofree readonly) # // CHECK: define {{.+}} [[TMAIN_INT]]( diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp index 6af2f7385e62e..e749cb9cfa778 100644 --- a/clang/test/OpenMP/irbuilder_simd_aligned.cpp +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -164,7 +164,7 @@ void simple(float *a, float *b, int *c) { //. // CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +// CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 50} diff --git a/clang/test/Sema/libbuiltins-ctype-powerpc64.c b/clang/test/Sema/libbuiltins-ctype-powerpc64.c index 6a5e6031c5ee2..fce9f5c0716e9 100644 --- a/clang/test/Sema/libbuiltins-ctype-powerpc64.c +++ b/clang/test/Sema/libbuiltins-ctype-powerpc64.c @@ -61,5 +61,5 @@ void test(int x) { // CHECK: declare signext i32 @tolower(i32 noundef signext) [[NUW_RO:#[0-9]+]] // CHECK: declare signext i32 @toupper(i32 noundef signext) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/clang/test/Sema/libbuiltins-ctype-x86_64.c b/clang/test/Sema/libbuiltins-ctype-x86_64.c index ed6e31e734343..0d182d7095aac 100644 --- a/clang/test/Sema/libbuiltins-ctype-x86_64.c +++ b/clang/test/Sema/libbuiltins-ctype-x86_64.c @@ -61,5 +61,5 @@ void test(int x) { // CHECK: declare i32 @tolower(i32 noundef) [[NUW_RO:#[0-9]+]] // CHECK: declare i32 @toupper(i32 noundef) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c7cfbf12eaa48..5d5916abbfef1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1414,6 +1414,30 @@ Currently, only the following parameter attributes are defined: same address may be returned), for a free-like function the pointer will always be invalidated. +``readnone`` + This attribute indicates that the function does not dereference that + pointer argument, even though it may read or write the memory that the + pointer points to if accessed through other pointers. + + If a function reads from or writes to a readnone pointer argument, the + behavior is undefined. + +``readonly`` + This attribute indicates that the function does not write through this + pointer argument, even though it may write to the memory that the pointer + points to. + + If a function writes to a readonly pointer argument, the behavior is + undefined. + +``writeonly`` + This attribute indicates that the function may write to, but does not read + through this pointer argument (even though it may read from the memory that + the pointer points to). + + If a function reads from a writeonly pointer argument, the behavior is + undefined. + .. _gc: Garbage Collector Strategy Names @@ -1701,22 +1725,6 @@ example: the profile information. By marking a function ``hot``, users can work around the cases where the training input does not have good coverage on all the hot functions. -``inaccessiblememonly`` - This attribute indicates that the function may only access memory that - is not accessible by the module being compiled before return from the - function. This is a weaker form of ``readnone``. If the function reads - or writes other memory, the behavior is undefined. - - For clarity, note that such functions are allowed to return new memory - which is ``noalias`` with respect to memory already accessible from - the module. That is, a function can be both ``inaccessiblememonly`` and - have a ``noalias`` return which introduces a new, potentially initialized, - allocation. -``inaccessiblemem_or_argmemonly`` - This attribute indicates that the function may only access memory that is - either not accessible by the module being compiled, or is pointed to - by its pointer arguments. This is a weaker form of ``argmemonly``. If the - function reads or writes other memory, the behavior is undefined. ``inlinehint`` This attribute indicates that the source code contained a hint that inlining this function is desirable (such as the "inline" keyword in @@ -1974,45 +1982,6 @@ example: function that has a ``"probe-stack"`` attribute is inlined into a function that has no ``"probe-stack"`` attribute at all, the resulting function has the ``"probe-stack"`` attribute of the callee. -``readnone`` - On a function, this attribute indicates that the function computes its - result (or decides to unwind an exception) based strictly on its arguments, - without dereferencing any pointer arguments or otherwise accessing - any mutable state (e.g. memory, control registers, etc) visible outside the - ``readnone`` function. It does not write through any pointer arguments - (including ``byval`` arguments) and never changes any state visible to - callers. This means while it cannot unwind exceptions by calling the ``C++`` - exception throwing methods (since they write to memory), there may be - non-``C++`` mechanisms that throw exceptions without writing to LLVM visible - memory. - - On an argument, this attribute indicates that the function does not - dereference that pointer argument, even though it may read or write the - memory that the pointer points to if accessed through other pointers. - - If a readnone function reads or writes memory visible outside the function, - or has other side-effects, the behavior is undefined. If a - function reads from or writes to a readnone pointer argument, the behavior - is undefined. -``readonly`` - On a function, this attribute indicates that the function does not write - through any pointer arguments (including ``byval`` arguments) or otherwise - modify any state (e.g. memory, control registers, etc) visible outside the - ``readonly`` function. It may dereference pointer arguments and read - state that may be set in the caller. A readonly function always - returns the same value (or unwinds an exception identically) when - called with the same set of arguments and global state. This means while it - cannot unwind exceptions by calling the ``C++`` exception throwing methods - (since they write to memory), there may be non-``C++`` mechanisms that throw - exceptions without writing to LLVM visible memory. - - On an argument, this attribute indicates that the function does not write - through this pointer argument, even though it may write to the memory that - the pointer points to. - - If a readonly function writes memory visible outside the function, or has - other side-effects, the behavior is undefined. If a function writes to a - readonly pointer argument, the behavior is undefined. ``"stack-probe-size"`` This attribute controls the behavior of stack probes: either the ``"probe-stack"`` attribute, or ABI-required stack probes, if any. @@ -2030,29 +1999,6 @@ example: of the callee. ``"no-stack-arg-probe"`` This attribute disables ABI-required stack probes, if any. -``writeonly`` - On a function, this attribute indicates that the function may write to but - does not read from memory visible outside the ``writeonly`` function. - - On an argument, this attribute indicates that the function may write to but - does not read through this pointer argument (even though it may read from - the memory that the pointer points to). - - If a writeonly function reads memory visible outside the function or has - other side-effects, the behavior is undefined. If a function reads - from a writeonly pointer argument, the behavior is undefined. -``argmemonly`` - This attribute indicates that the only memory accesses inside function are - loads and stores from objects pointed to by its pointer-typed arguments, - with arbitrary offsets. Or in other words, all memory operations in the - function can refer to memory only using pointers based on its function - arguments. - - Note that ``argmemonly`` can be used together with ``readonly`` attribute - in order to specify that function reads only from its arguments. - - If an argmemonly function reads or writes memory other than the pointer - arguments, or has other side-effects, the behavior is undefined. ``returns_twice`` This attribute indicates that this function can return twice. The C ``setjmp`` is an example of such a function. The compiler disables diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d1a9cac4c31a4..465fdc329647d 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -67,6 +67,27 @@ and there is no way to suppress this error. Changes to the LLVM IR ---------------------- +* The ``readnone``, ``readonly``, ``writeonly``, ``argmemonly``, + ``inaccessiblememonly`` and ``inaccessiblemem_or_argmemonly`` function + attributes have been replaced by a single ``memory(...)`` attribute. The + old attributes may be mapped to the new one as follows: + + * ``readnone`` -> ``memory(none)`` + * ``readonly`` -> ``memory(read)`` + * ``writeonly`` -> ``memory(write)`` + * ``argmemonly`` -> ``memory(argmem: readwrite)`` + * ``argmemonly readonly`` -> ``memory(argmem: read)`` + * ``argmemonly writeonly`` -> ``memory(argmem: write)`` + * ``inaccessiblememonly`` -> ``memory(inaccessiblemem: readwrite)`` + * ``inaccessiblememonly readonly`` -> ``memory(inaccessiblemem: read)`` + * ``inaccessiblememonly writeonly`` -> ``memory(inaccessiblemem: write)`` + * ``inaccessiblemem_or_argmemonly`` -> + ``memory(argmem: readwrite, inaccessiblemem: readwrite)`` + * ``inaccessiblemem_or_argmemonly readonly`` -> + ``memory(argmem: read, inaccessiblemem: read)`` + * ``inaccessiblemem_or_argmemonly writeonly`` -> + ``memory(argmem: write, inaccessiblemem: write)`` + * The constant expression variants of the following instructions has been removed: diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 87df754968265..5fc192f145aaf 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -190,6 +190,11 @@ enum Kind { kw_argmem, kw_inaccessiblemem, + // Legacy memory attributes: + kw_argmemonly, + kw_inaccessiblememonly, + kw_inaccessiblemem_or_argmemonly, + kw_type, kw_opaque, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 71abc8822730a..03964af5893f0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -487,6 +487,7 @@ __OMP_RTL(__last, false, Void, ) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) #define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) #define AllocSizeAttr(N, M) Attribute::getWithAllocSizeArgs(Ctx, N, M) +#define MemoryAttr(ME) Attribute::getWithMemoryEffects(Ctx, ME) #define AttributeSet(...) \ AttributeSet::get(Ctx, ArrayRef({__VA_ARGS__})) @@ -496,27 +497,29 @@ __OMP_RTL(__last, false, Void, ) #define __OMP_ATTRS_SET(VarName, AttrSet) OMP_ATTRS_SET(VarName, AttrSet) -__OMP_ATTRS_SET(GetterAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly), - EnumAttr(NoSync), EnumAttr(NoFree), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) -__OMP_ATTRS_SET(GetterArgWriteAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(NoFree), - EnumAttr(InaccessibleMemOrArgMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) -__OMP_ATTRS_SET(SetterAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly), - EnumAttr(NoSync), EnumAttr(NoFree), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + GetterAttrs, + OptimisticAttributes + ? AttributeSet( + EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref))) + : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + GetterArgWriteAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + SetterAttrs, + OptimisticAttributes + ? AttributeSet( + EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::Mod))) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(DefaultAttrs, OptimisticAttributes @@ -529,12 +532,13 @@ __OMP_ATTRS_SET(BarrierAttrs, ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) -__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(InaccessibleMemOrArgMemOnly), - EnumAttr(WillReturn), EnumAttr(NoFree)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + InaccessibleArgOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(AlwaysInlineAttrs, OptimisticAttributes @@ -542,12 +546,13 @@ __OMP_ATTRS_SET(AlwaysInlineAttrs, : AttributeSet(EnumAttr(AlwaysInline))) #if 0 -__OMP_ATTRS_SET(InaccessibleOnlyAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn), EnumAttr(NoFree)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + InaccessibleOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) #endif __OMP_ATTRS_SET(AllocAttrs, diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 595c9197de1dd..75fe534ac61ea 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -64,9 +64,6 @@ def AllocSize : IntAttr<"allocsize", [FnAttr]>; /// inline=always. def AlwaysInline : EnumAttr<"alwaysinline", [FnAttr]>; -/// Function can access memory only using pointers based on its arguments. -def ArgMemOnly : EnumAttr<"argmemonly", [FnAttr]>; - /// Callee is recognized as a builtin, despite nobuiltin attribute on its /// declaration. def Builtin : EnumAttr<"builtin", [FnAttr]>; @@ -106,14 +103,6 @@ def ElementType : TypeAttr<"elementtype", [ParamAttr]>; /// symbol. def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", [FnAttr]>; -/// Function may only access memory that is inaccessible from IR. -def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly", [FnAttr]>; - -/// Function may only access memory that is either inaccessible from the IR, -/// or pointed to by its pointer arguments. -def InaccessibleMemOrArgMemOnly : EnumAttr<"inaccessiblemem_or_argmemonly", - [FnAttr]>; - /// Pass structure in an alloca. def InAlloca : TypeAttr<"inalloca", [ParamAttr]>; @@ -218,10 +207,10 @@ def OptimizeNone : EnumAttr<"optnone", [FnAttr]>; def Preallocated : TypeAttr<"preallocated", [FnAttr, ParamAttr]>; /// Function does not access memory. -def ReadNone : EnumAttr<"readnone", [FnAttr, ParamAttr]>; +def ReadNone : EnumAttr<"readnone", [ParamAttr]>; /// Function only reads from memory. -def ReadOnly : EnumAttr<"readonly", [FnAttr, ParamAttr]>; +def ReadOnly : EnumAttr<"readonly", [ParamAttr]>; /// Return value is always equal to this argument. def Returned : EnumAttr<"returned", [ParamAttr]>; @@ -306,7 +295,7 @@ def VScaleRange : IntAttr<"vscale_range", [FnAttr]>; def WillReturn : EnumAttr<"willreturn", [FnAttr]>; /// Function only writes to memory. -def WriteOnly : EnumAttr<"writeonly", [FnAttr, ParamAttr]>; +def WriteOnly : EnumAttr<"writeonly", [ParamAttr]>; /// Zero extended before/after call. def ZExt : EnumAttr<"zeroext", [ParamAttr, RetAttr]>; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 7945c64c86103..22e98e91d4580 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -491,54 +491,35 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); } void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); } + MemoryEffects getMemoryEffects() const; + void setMemoryEffects(MemoryEffects ME); + /// Determine if the function does not access memory. - bool doesNotAccessMemory() const { - return hasFnAttribute(Attribute::ReadNone); - } - void setDoesNotAccessMemory() { - addFnAttr(Attribute::ReadNone); - } + bool doesNotAccessMemory() const; + void setDoesNotAccessMemory(); /// Determine if the function does not access or only reads memory. - bool onlyReadsMemory() const { - return doesNotAccessMemory() || hasFnAttribute(Attribute::ReadOnly); - } - void setOnlyReadsMemory() { - addFnAttr(Attribute::ReadOnly); - } + bool onlyReadsMemory() const; + void setOnlyReadsMemory(); /// Determine if the function does not access or only writes memory. - bool onlyWritesMemory() const { - return doesNotAccessMemory() || hasFnAttribute(Attribute::WriteOnly); - } - void setOnlyWritesMemory() { - addFnAttr(Attribute::WriteOnly); - } + bool onlyWritesMemory() const; + void setOnlyWritesMemory(); /// Determine if the call can access memmory only using pointers based /// on its arguments. - bool onlyAccessesArgMemory() const { - return hasFnAttribute(Attribute::ArgMemOnly); - } - void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } + bool onlyAccessesArgMemory() const; + void setOnlyAccessesArgMemory(); /// Determine if the function may only access memory that is /// inaccessible from the IR. - bool onlyAccessesInaccessibleMemory() const { - return hasFnAttribute(Attribute::InaccessibleMemOnly); - } - void setOnlyAccessesInaccessibleMemory() { - addFnAttr(Attribute::InaccessibleMemOnly); - } + bool onlyAccessesInaccessibleMemory() const; + void setOnlyAccessesInaccessibleMemory(); /// Determine if the function may only access memory that is /// either inaccessible from the IR or pointed to by its arguments. - bool onlyAccessesInaccessibleMemOrArgMem() const { - return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly); - } - void setOnlyAccessesInaccessibleMemOrArgMem() { - addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } + bool onlyAccessesInaccessibleMemOrArgMem() const; + void setOnlyAccessesInaccessibleMemOrArgMem(); /// Determine if the function cannot return. bool doesNotReturn() const { diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 60390b18632ea..da081d0c40711 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1847,47 +1847,37 @@ class CallBase : public Instruction { /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { addFnAttr(Attribute::NoInline); } + + MemoryEffects getMemoryEffects() const; + void setMemoryEffects(MemoryEffects ME); + /// Determine if the call does not access memory. - bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); } - void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); } + bool doesNotAccessMemory() const; + void setDoesNotAccessMemory(); /// Determine if the call does not access or only reads memory. - bool onlyReadsMemory() const { - return hasImpliedFnAttr(Attribute::ReadOnly); - } - - void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); } + bool onlyReadsMemory() const; + void setOnlyReadsMemory(); /// Determine if the call does not access or only writes memory. - bool onlyWritesMemory() const { - return hasImpliedFnAttr(Attribute::WriteOnly); - } - void setOnlyWritesMemory() { addFnAttr(Attribute::WriteOnly); } + bool onlyWritesMemory() const; + void setOnlyWritesMemory(); /// Determine if the call can access memmory only using pointers based /// on its arguments. - bool onlyAccessesArgMemory() const { - return hasFnAttr(Attribute::ArgMemOnly); - } - void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } + bool onlyAccessesArgMemory() const; + void setOnlyAccessesArgMemory(); /// Determine if the function may only access memory that is /// inaccessible from the IR. - bool onlyAccessesInaccessibleMemory() const { - return hasFnAttr(Attribute::InaccessibleMemOnly); - } - void setOnlyAccessesInaccessibleMemory() { - addFnAttr(Attribute::InaccessibleMemOnly); - } + bool onlyAccessesInaccessibleMemory() const; + void setOnlyAccessesInaccessibleMemory(); /// Determine if the function may only access memory that is /// either inaccessible from the IR or pointed to by its arguments. - bool onlyAccessesInaccessibleMemOrArgMem() const { - return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } - void setOnlyAccessesInaccessibleMemOrArgMem() { - addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } + bool onlyAccessesInaccessibleMemOrArgMem() const; + void setOnlyAccessesInaccessibleMemOrArgMem(); + /// Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); } @@ -2107,43 +2097,6 @@ class CallBase : public Instruction { return false; } - /// Is the function attribute S disallowed by some operand bundle on - /// this operand bundle user? - bool isFnAttrDisallowedByOpBundle(StringRef S) const { - // Operand bundles only possibly disallow memory access attributes. All - // String attributes are fine. - return false; - } - - /// Is the function attribute A disallowed by some operand bundle on - /// this operand bundle user? - bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const { - switch (A) { - default: - return false; - - case Attribute::InaccessibleMemOrArgMemOnly: - return hasReadingOperandBundles(); - - case Attribute::InaccessibleMemOnly: - return hasReadingOperandBundles(); - - case Attribute::ArgMemOnly: - return hasReadingOperandBundles(); - - case Attribute::ReadNone: - return hasReadingOperandBundles(); - - case Attribute::ReadOnly: - return hasClobberingOperandBundles(); - - case Attribute::WriteOnly: - return hasReadingOperandBundles(); - } - - llvm_unreachable("switch has a default case!"); - } - /// Used to keep track of an operand bundle. See the main comment on /// OperandBundleUser above. struct BundleOpInfo { @@ -2303,35 +2256,10 @@ class CallBase : public Instruction { if (Attrs.hasFnAttr(Kind)) return true; - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the call instruction. - if (isFnAttrDisallowedByOpBundle(Kind)) - return false; - return hasFnAttrOnCalledFunction(Kind); } template Attribute getFnAttrOnCalledFunction(AK Kind) const; - /// A specialized version of hasFnAttrImpl for when the caller wants to - /// know if an attribute's semantics are implied, not whether the attribute - /// is actually present. This distinction only exists when checking whether - /// something is readonly or writeonly since readnone implies both. The case - /// which motivates the specialized code is a callee with readnone, and an - /// operand bundle on the call which disallows readnone but not either - /// readonly or writeonly. - bool hasImpliedFnAttr(Attribute::AttrKind Kind) const { - assert((Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly) && - "use hasFnAttrImpl instead"); - if (Attrs.hasFnAttr(Kind) || Attrs.hasFnAttr(Attribute::ReadNone)) - return true; - - if (isFnAttrDisallowedByOpBundle(Kind)) - return false; - - return hasFnAttrOnCalledFunction(Kind) || - hasFnAttrOnCalledFunction(Attribute::ReadNone); - } - /// Determine whether the return value has the given attribute. Supports /// Attribute::AttrKind and StringRef as \p AttrKind types. template bool hasRetAttrImpl(AttrKind Kind) const { diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 2bbc2486fec00..30dab43101dea 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -752,30 +752,10 @@ static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) { return II && II->getIntrinsicID() == IID; } -static MemoryEffects getMemoryEffectsFromAttrs(AttributeSet Attrs) { - if (Attrs.hasAttribute(Attribute::ReadNone)) - return MemoryEffects::none(); - - ModRefInfo MR = ModRefInfo::ModRef; - if (Attrs.hasAttribute(Attribute::ReadOnly)) - MR = ModRefInfo::Ref; - else if (Attrs.hasAttribute(Attribute::WriteOnly)) - MR = ModRefInfo::Mod; - - if (Attrs.hasAttribute(Attribute::ArgMemOnly)) - return MemoryEffects::argMemOnly(MR); - if (Attrs.hasAttribute(Attribute::InaccessibleMemOnly)) - return MemoryEffects::inaccessibleMemOnly(MR); - if (Attrs.hasAttribute(Attribute::InaccessibleMemOrArgMemOnly)) - return MemoryEffects::inaccessibleOrArgMemOnly(MR); - return MemoryEffects(MR); -} - /// Returns the behavior when calling the given call site. MemoryEffects BasicAAResult::getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI) { - MemoryEffects Min = - getMemoryEffectsFromAttrs(Call->getAttributes().getFnAttrs()); + MemoryEffects Min = Call->getAttributes().getMemoryEffects(); if (const Function *F = dyn_cast(Call->getCalledOperand())) { MemoryEffects FuncME = AAQI.AAR.getMemoryEffects(F); @@ -803,7 +783,7 @@ MemoryEffects BasicAAResult::getMemoryEffects(const Function *F) { MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef); } - return getMemoryEffectsFromAttrs(F->getAttributes().getFnAttrs()); + return F->getMemoryEffects(); } ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call, diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 496f1ed435663..2a171df168fca 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -649,6 +649,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(readwrite); KEYWORD(argmem); KEYWORD(inaccessiblemem); + KEYWORD(argmemonly); + KEYWORD(inaccessiblememonly); + KEYWORD(inaccessiblemem_or_argmemonly); KEYWORD(type); KEYWORD(opaque); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 0fda0559b5b41..8767da472ed60 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1472,6 +1472,31 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B, } } +static bool upgradeMemoryAttr(MemoryEffects &ME, lltok::Kind Kind) { + switch (Kind) { + case lltok::kw_readnone: + ME &= MemoryEffects::none(); + return true; + case lltok::kw_readonly: + ME &= MemoryEffects::readOnly(); + return true; + case lltok::kw_writeonly: + ME &= MemoryEffects::writeOnly(); + return true; + case lltok::kw_argmemonly: + ME &= MemoryEffects::argMemOnly(); + return true; + case lltok::kw_inaccessiblememonly: + ME &= MemoryEffects::inaccessibleMemOnly(); + return true; + case lltok::kw_inaccessiblemem_or_argmemonly: + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + return true; + default: + return false; + } +} + /// parseFnAttributeValuePairs /// ::= | '=' bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, @@ -1481,10 +1506,11 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, B.clear(); + MemoryEffects ME = MemoryEffects::unknown(); while (true) { lltok::Kind Token = Lex.getKind(); if (Token == lltok::rbrace) - return HaveError; // Finished. + break; // Finished. if (Token == lltok::StringConstant) { if (parseStringAttribute(B)) @@ -1512,10 +1538,15 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, if (Token == lltok::kw_builtin) BuiltinLoc = Loc; + if (upgradeMemoryAttr(ME, Token)) { + Lex.Lex(); + continue; + } + Attribute::AttrKind Attr = tokenToAttribute(Token); if (Attr == Attribute::None) { if (!InAttrGrp) - return HaveError; + break; return error(Lex.getLoc(), "unterminated attribute group"); } @@ -1528,6 +1559,10 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, if (!Attribute::canUseAsFnAttr(Attr) && Attr != Attribute::Alignment) HaveError |= error(Loc, "this attribute does not apply to functions"); } + + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + return HaveError; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 338674c086356..66b4edbacde72 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1718,8 +1718,8 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::Convergent: return 1ULL << 46; case Attribute::SafeStack: return 1ULL << 47; case Attribute::NoRecurse: return 1ULL << 48; - case Attribute::InaccessibleMemOnly: return 1ULL << 49; - case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50; + // 1ULL << 49 is InaccessibleMemOnly, which is upgraded separately. + // 1ULL << 50 is InaccessibleMemOrArgMemOnly, which is upgraded separately. case Attribute::SwiftSelf: return 1ULL << 51; case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; @@ -1767,7 +1767,8 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) { /// been decoded from the given integer. This function must stay in sync with /// 'encodeLLVMAttributesForBitcode'. static void decodeLLVMAttributesForBitcode(AttrBuilder &B, - uint64_t EncodedAttrs) { + uint64_t EncodedAttrs, + uint64_t AttrIdx) { // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; @@ -1776,8 +1777,43 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, if (Alignment) B.addAlignmentAttr(Alignment); - addRawAttributeValue(B, ((EncodedAttrs & (0xfffffULL << 32)) >> 11) | - (EncodedAttrs & 0xffff)); + + uint64_t Attrs = ((EncodedAttrs & (0xfffffULL << 32)) >> 11) | + (EncodedAttrs & 0xffff); + + if (AttrIdx == AttributeList::FunctionIndex) { + // Upgrade old memory attributes. + MemoryEffects ME = MemoryEffects::unknown(); + if (Attrs & (1ULL << 9)) { + // ReadNone + Attrs &= ~(1ULL << 9); + ME &= MemoryEffects::none(); + } + if (Attrs & (1ULL << 10)) { + // ReadOnly + Attrs &= ~(1ULL << 10); + ME &= MemoryEffects::readOnly(); + } + if (Attrs & (1ULL << 49)) { + // InaccessibleMemOnly + Attrs &= ~(1ULL << 49); + ME &= MemoryEffects::inaccessibleMemOnly(); + } + if (Attrs & (1ULL << 50)) { + // InaccessibleMemOrArgMemOnly + Attrs &= ~(1ULL << 50); + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + } + if (Attrs & (1ULL << 53)) { + // WriteOnly + Attrs &= ~(1ULL << 53); + ME &= MemoryEffects::writeOnly(); + } + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + } + + addRawAttributeValue(B, Attrs); } Error BitcodeReader::parseAttributeBlock() { @@ -1824,7 +1860,7 @@ Error BitcodeReader::parseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B(Context); - decodeLLVMAttributesForBitcode(B, Record[i+1]); + decodeLLVMAttributesForBitcode(B, Record[i+1], Record[i]); Attrs.push_back(AttributeList::get(Context, Record[i], B)); } @@ -1851,8 +1887,6 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; - case bitc::ATTR_KIND_ARGMEMONLY: - return Attribute::ArgMemOnly; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: @@ -1869,10 +1903,6 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::ElementType; case bitc::ATTR_KIND_FNRETTHUNK_EXTERN: return Attribute::FnRetThunkExtern; - case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: - return Attribute::InaccessibleMemOnly; - case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: - return Attribute::InaccessibleMemOrArgMemOnly; case bitc::ATTR_KIND_INLINE_HINT: return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: @@ -2039,6 +2069,31 @@ Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) { return Error::success(); } +static bool upgradeOldMemoryAttribute(MemoryEffects &ME, uint64_t EncodedKind) { + switch (EncodedKind) { + case bitc::ATTR_KIND_READ_NONE: + ME &= MemoryEffects::none(); + return true; + case bitc::ATTR_KIND_READ_ONLY: + ME &= MemoryEffects::readOnly(); + return true; + case bitc::ATTR_KIND_WRITEONLY: + ME &= MemoryEffects::writeOnly(); + return true; + case bitc::ATTR_KIND_ARGMEMONLY: + ME &= MemoryEffects::argMemOnly(); + return true; + case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: + ME &= MemoryEffects::inaccessibleMemOnly(); + return true; + case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + return true; + default: + return false; + } +} + Error BitcodeReader::parseAttributeGroupBlock() { if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) return Err; @@ -2082,10 +2137,16 @@ Error BitcodeReader::parseAttributeGroupBlock() { uint64_t Idx = Record[1]; // Index of the object this attribute refers to. AttrBuilder B(Context); + MemoryEffects ME = MemoryEffects::unknown(); for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute Attribute::AttrKind Kind; - if (Error Err = parseAttrKind(Record[++i], &Kind)) + uint64_t EncodedKind = Record[++i]; + if (Idx == AttributeList::FunctionIndex && + upgradeOldMemoryAttribute(ME, EncodedKind)) + continue; + + if (Error Err = parseAttrKind(EncodedKind, &Kind)) return Err; // Upgrade old-style byval attribute to one with a type, even if it's @@ -2159,6 +2220,9 @@ Error BitcodeReader::parseAttributeGroupBlock() { } } + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + UpgradeAttributes(B); MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B); break; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1ac4413f158eb..85ada2c6b52ef 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -620,8 +620,6 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; - case Attribute::ArgMemOnly: - return bitc::ATTR_KIND_ARGMEMONLY; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: @@ -640,10 +638,6 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_HOT; case Attribute::ElementType: return bitc::ATTR_KIND_ELEMENTTYPE; - case Attribute::InaccessibleMemOnly: - return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; - case Attribute::InaccessibleMemOrArgMemOnly: - return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY; case Attribute::InlineHint: return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 6ef36d86891a1..74e4ad0562b4d 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -61,6 +61,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/ModRef.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" @@ -1474,10 +1475,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC; unsigned IntrID = IntrIDOp.getIntrinsicID(); if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) { - AttributeList Attrs - = Intrinsic::getAttributes(MF->getFunction().getContext(), - static_cast(IntrID)); - bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone); + AttributeList Attrs = Intrinsic::getAttributes( + MF->getFunction().getContext(), static_cast(IntrID)); + bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory(); if (NoSideEffects && DeclHasSideEffects) { report("G_INTRINSIC used with intrinsic that accesses memory", MI); break; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 66f7f84b2267c..ab6624ef5f3d7 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/SymbolTableListTraits.h" @@ -727,6 +728,65 @@ void Function::copyAttributesFrom(const Function *Src) { setPrologueData(Src->getPrologueData()); } +MemoryEffects Function::getMemoryEffects() const { + return getAttributes().getMemoryEffects(); +} +void Function::setMemoryEffects(MemoryEffects ME) { + addFnAttr(Attribute::getWithMemoryEffects(getContext(), ME)); +} + +/// Determine if the function does not access memory. +bool Function::doesNotAccessMemory() const { + return getMemoryEffects().doesNotAccessMemory(); +} +void Function::setDoesNotAccessMemory() { + setMemoryEffects(MemoryEffects::none()); +} + +/// Determine if the function does not access or only reads memory. +bool Function::onlyReadsMemory() const { + return getMemoryEffects().onlyReadsMemory(); +} +void Function::setOnlyReadsMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::readOnly()); +} + +/// Determine if the function does not access or only writes memory. +bool Function::onlyWritesMemory() const { + return getMemoryEffects().onlyWritesMemory(); +} +void Function::setOnlyWritesMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::writeOnly()); +} + +/// Determine if the call can access memmory only using pointers based +/// on its arguments. +bool Function::onlyAccessesArgMemory() const { + return getMemoryEffects().onlyAccessesArgPointees(); +} +void Function::setOnlyAccessesArgMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::argMemOnly()); +} + +/// Determine if the function may only access memory that is +/// inaccessible from the IR. +bool Function::onlyAccessesInaccessibleMemory() const { + return getMemoryEffects().onlyAccessesInaccessibleMem(); +} +void Function::setOnlyAccessesInaccessibleMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::inaccessibleMemOnly()); +} + +/// Determine if the function may only access memory that is +/// either inaccessible from the IR or pointed to by its arguments. +bool Function::onlyAccessesInaccessibleMemOrArgMem() const { + return getMemoryEffects().onlyAccessesInaccessibleOrArgMem(); +} +void Function::setOnlyAccessesInaccessibleMemOrArgMem() { + setMemoryEffects(getMemoryEffects() & + MemoryEffects::inaccessibleOrArgMemOnly()); +} + /// Table of string intrinsic names indexed by enum value. static const char * const IntrinsicNameTable[] = { "not_intrinsic", diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 8682a938d78c4..05e225dd64cfa 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -391,10 +392,12 @@ bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const { template Attribute CallBase::getFnAttrOnCalledFunction(AK Kind) const { - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the call instruction. - if (isFnAttrDisallowedByOpBundle(Kind)) - return Attribute(); + if constexpr (std::is_same_v) { + // getMemoryEffects() correctly combines memory effects from the call-site, + // operand bundles and function. + assert(Kind != Attribute::Memory && "Use getMemoryEffects() instead"); + } + Value *V = getCalledOperand(); if (auto *CE = dyn_cast(V)) if (CE->getOpcode() == BitCast) @@ -534,6 +537,77 @@ bool CallBase::hasClobberingOperandBundles() const { getIntrinsicID() != Intrinsic::assume; } +MemoryEffects CallBase::getMemoryEffects() const { + MemoryEffects ME = getAttributes().getMemoryEffects(); + if (auto *Fn = dyn_cast(getCalledOperand())) { + MemoryEffects FnME = Fn->getMemoryEffects(); + if (hasOperandBundles()) { + // TODO: Add a method to get memory effects for operand bundles instead. + if (hasReadingOperandBundles()) + FnME |= MemoryEffects::readOnly(); + if (hasClobberingOperandBundles()) + FnME |= MemoryEffects::writeOnly(); + } + ME &= FnME; + } + return ME; +} +void CallBase::setMemoryEffects(MemoryEffects ME) { + addFnAttr(Attribute::getWithMemoryEffects(getContext(), ME)); +} + +/// Determine if the function does not access memory. +bool CallBase::doesNotAccessMemory() const { + return getMemoryEffects().doesNotAccessMemory(); +} +void CallBase::setDoesNotAccessMemory() { + setMemoryEffects(MemoryEffects::none()); +} + +/// Determine if the function does not access or only reads memory. +bool CallBase::onlyReadsMemory() const { + return getMemoryEffects().onlyReadsMemory(); +} +void CallBase::setOnlyReadsMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::readOnly()); +} + +/// Determine if the function does not access or only writes memory. +bool CallBase::onlyWritesMemory() const { + return getMemoryEffects().onlyWritesMemory(); +} +void CallBase::setOnlyWritesMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::writeOnly()); +} + +/// Determine if the call can access memmory only using pointers based +/// on its arguments. +bool CallBase::onlyAccessesArgMemory() const { + return getMemoryEffects().onlyAccessesArgPointees(); +} +void CallBase::setOnlyAccessesArgMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::argMemOnly()); +} + +/// Determine if the function may only access memory that is +/// inaccessible from the IR. +bool CallBase::onlyAccessesInaccessibleMemory() const { + return getMemoryEffects().onlyAccessesInaccessibleMem(); +} +void CallBase::setOnlyAccessesInaccessibleMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::inaccessibleMemOnly()); +} + +/// Determine if the function may only access memory that is +/// either inaccessible from the IR or pointed to by its arguments. +bool CallBase::onlyAccessesInaccessibleMemOrArgMem() const { + return getMemoryEffects().onlyAccessesInaccessibleOrArgMem(); +} +void CallBase::setOnlyAccessesInaccessibleMemOrArgMem() { + setMemoryEffects(getMemoryEffects() & + MemoryEffects::inaccessibleOrArgMemOnly()); +} + //===----------------------------------------------------------------------===// // CallInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index e7c2eb9e8818b..3c68f07cb230d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2021,28 +2021,6 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, "' does not apply to functions!", V); - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::ReadOnly)), - "Attributes 'readnone and readonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readnone and writeonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadOnly) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readonly and writeonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)), - "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " - "incompatible!", - V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)), - "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); - Check(!(Attrs.hasFnAttr(Attribute::NoInline) && Attrs.hasFnAttr(Attribute::AlwaysInline)), "Attributes 'noinline and alwaysinline' are incompatible!", V); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp index 1b0d7bb43c80f..9dbcb548f8476 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" @@ -992,7 +993,8 @@ FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M, } else { AttributeList Attr; LLVMContext &Ctx = M->getContext(); - Attr = Attr.addFnAttribute(Ctx, Attribute::ReadOnly); + Attr = Attr.addFnAttribute( + Ctx, Attribute::getWithMemoryEffects(Ctx, MemoryEffects::readOnly())); Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind); C = M->getOrInsertFunction(FuncName, FuncTy, Attr); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index eb947a17ef9ec..2570e7a661e59 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" +#include "llvm/IR/ModRef.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" @@ -958,7 +959,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, AMDGPU::lookupRsrcIntrinsic(IntrID)) { AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), (Intrinsic::ID)IntrID); - if (Attr.hasFnAttr(Attribute::ReadNone)) + MemoryEffects ME = Attr.getMemoryEffects(); + if (ME.doesNotAccessMemory()) return false; SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -974,7 +976,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } Info.flags |= MachineMemOperand::MODereferenceable; - if (Attr.hasFnAttr(Attribute::ReadOnly)) { + if (ME.onlyReadsMemory()) { unsigned DMaskLanes = 4; if (RsrcIntr->IsImage) { @@ -998,7 +1000,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // FIXME: What does alignment mean for an image? Info.opc = ISD::INTRINSIC_W_CHAIN; Info.flags |= MachineMemOperand::MOLoad; - } else if (Attr.hasFnAttr(Attribute::WriteOnly)) { + } else if (ME.onlyWritesMemory()) { Info.opc = ISD::INTRINSIC_VOID; Type *DataTy = CI.getArgOperand(0)->getType(); diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp index 419f0ac1a8a72..8b928e36b9da5 100644 --- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -12,6 +12,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" @@ -409,7 +410,8 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, // functions will take place. // A = A.addFnAttribute(C, "__Mips16RetHelper"); - A = A.addFnAttribute(C, Attribute::ReadNone); + A = A.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::none())); A = A.addFnAttribute(C, Attribute::NoInline); FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T)); CallInst::Create(F, Params, "", &I); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index c1850f7378127..0514e503fc6b4 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -7305,13 +7305,28 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { + // TODO: It would be better to merge this with AAMemoryLocation, so that + // we could determine read/write per location. This would also have the + // benefit of only one place trying to manifest the memory attribute. Function &F = cast(getAnchorValue()); - if (isAssumedReadNone()) { - F.removeFnAttr(Attribute::ArgMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } - return AAMemoryBehaviorImpl::manifest(A); + MemoryEffects ME = MemoryEffects::unknown(); + if (isAssumedReadNone()) + ME = MemoryEffects::none(); + else if (isAssumedReadOnly()) + ME = MemoryEffects::readOnly(); + else if (isAssumedWriteOnly()) + ME = MemoryEffects::writeOnly(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + MemoryEffects ExistingME = F.getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + + return IRAttributeManifest::manifestAttrs( + A, getIRPosition(), Attribute::getWithMemoryEffects(F.getContext(), ME), + /*ForceReplace*/ true); } /// See AbstractAttribute::trackStatistics() @@ -7351,6 +7366,31 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { return clampStateAndIndicateChange(getState(), FnAA.getState()); } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // TODO: Deduplicate this with AAMemoryBehaviorFunction. + CallBase &CB = cast(getAnchorValue()); + MemoryEffects ME = MemoryEffects::unknown(); + if (isAssumedReadNone()) + ME = MemoryEffects::none(); + else if (isAssumedReadOnly()) + ME = MemoryEffects::readOnly(); + else if (isAssumedWriteOnly()) + ME = MemoryEffects::writeOnly(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + MemoryEffects ExistingME = CB.getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + + return IRAttributeManifest::manifestAttrs( + A, getIRPosition(), + Attribute::getWithMemoryEffects(CB.getContext(), ME), + /*ForceReplace*/ true); + } + /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { if (isAssumedReadNone()) @@ -7620,36 +7660,54 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { // unlikely this will cause real performance problems. If we are deriving // attributes for the anchor function we even remove the attribute in // addition to ignoring it. + // TODO: A better way to handle this would be to add ~NO_GLOBAL_MEM / + // MemoryEffects::Other as a possible location. bool UseArgMemOnly = true; Function *AnchorFn = IRP.getAnchorScope(); if (AnchorFn && A.isRunOn(*AnchorFn)) UseArgMemOnly = !AnchorFn->hasLocalLinkage(); SmallVector Attrs; - IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); + IRP.getAttrs({Attribute::Memory}, Attrs, IgnoreSubsumingPositions); for (const Attribute &Attr : Attrs) { - switch (Attr.getKindAsEnum()) { - case Attribute::ReadNone: + // TODO: We can map MemoryEffects to Attributor locations more precisely. + MemoryEffects ME = Attr.getMemoryEffects(); + if (ME.doesNotAccessMemory()) { State.addKnownBits(NO_LOCAL_MEM | NO_CONST_MEM); - break; - case Attribute::InaccessibleMemOnly: + continue; + } + if (ME.onlyAccessesInaccessibleMem()) { State.addKnownBits(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); - break; - case Attribute::ArgMemOnly: + continue; + } + if (ME.onlyAccessesArgPointees()) { if (UseArgMemOnly) State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true)); - else - IRP.removeAttrs({Attribute::ArgMemOnly}); - break; - case Attribute::InaccessibleMemOrArgMemOnly: + else { + // Remove location information, only keep read/write info. + ME = MemoryEffects(ME.getModRef()); + IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), + ME), + /*ForceReplace*/ true); + } + continue; + } + if (ME.onlyAccessesInaccessibleOrArgMem()) { if (UseArgMemOnly) State.addKnownBits(inverseLocation( NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); - else - IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly}); - break; - default: - llvm_unreachable("Unexpected attribute!"); + else { + // Remove location information, only keep read/write info. + ME = MemoryEffects(ME.getModRef()); + IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), + ME), + /*ForceReplace*/ true); + } + continue; } } } @@ -7657,41 +7715,53 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { /// See AbstractAttribute::getDeducedAttributes(...). void getDeducedAttributes(LLVMContext &Ctx, SmallVectorImpl &Attrs) const override { + // TODO: We can map Attributor locations to MemoryEffects more precisely. assert(Attrs.size() == 0); - if (isAssumedReadNone()) { - Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); - } else if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) { - if (isAssumedInaccessibleMemOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::InaccessibleMemOnly)); + if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) { + if (isAssumedReadNone()) + Attrs.push_back( + Attribute::getWithMemoryEffects(Ctx, MemoryEffects::none())); + else if (isAssumedInaccessibleMemOnly()) + Attrs.push_back(Attribute::getWithMemoryEffects( + Ctx, MemoryEffects::inaccessibleMemOnly())); else if (isAssumedArgMemOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::ArgMemOnly)); - else if (isAssumedInaccessibleOrArgMemOnly()) Attrs.push_back( - Attribute::get(Ctx, Attribute::InaccessibleMemOrArgMemOnly)); + Attribute::getWithMemoryEffects(Ctx, MemoryEffects::argMemOnly())); + else if (isAssumedInaccessibleOrArgMemOnly()) + Attrs.push_back(Attribute::getWithMemoryEffects( + Ctx, MemoryEffects::inaccessibleOrArgMemOnly())); } assert(Attrs.size() <= 1); } /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { + // TODO: If AAMemoryLocation and AAMemoryBehavior are merged, we could + // provide per-location modref information here. const IRPosition &IRP = getIRPosition(); - // Check if we would improve the existing attributes first. - SmallVector DeducedAttrs; + SmallVector DeducedAttrs; getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); - if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { - return IRP.hasAttr(Attr.getKindAsEnum(), - /* IgnoreSubsumingPositions */ true); - })) + if (DeducedAttrs.size() != 1) return ChangeStatus::UNCHANGED; + MemoryEffects ME = DeducedAttrs[0].getMemoryEffects(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + SmallVector ExistingAttrs; + IRP.getAttrs({Attribute::Memory}, ExistingAttrs, + /* IgnoreSubsumingPositions */ true); + if (ExistingAttrs.size() == 1) { + MemoryEffects ExistingME = ExistingAttrs[0].getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + } - // Clear existing attributes. - IRP.removeAttrs(AttrKinds); - if (isAssumedReadNone()) - IRP.removeAttrs(AAMemoryBehaviorImpl::AttrKinds); - - // Use the generic manifest method. - return IRAttribute::manifest(A); + return IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), ME), + /*ForceReplace*/ true); } /// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...). @@ -7814,15 +7884,8 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { /// Used to allocate access sets. BumpPtrAllocator &Allocator; - - /// The set of IR attributes AAMemoryLocation deals with. - static const Attribute::AttrKind AttrKinds[4]; }; -const Attribute::AttrKind AAMemoryLocationImpl::AttrKinds[] = { - Attribute::ReadNone, Attribute::InaccessibleMemOnly, Attribute::ArgMemOnly, - Attribute::InaccessibleMemOrArgMemOnly}; - void AAMemoryLocationImpl::categorizePtrValue( Attributor &A, const Instruction &I, const Value &Ptr, AAMemoryLocation::StateType &State, bool &Changed) { diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index a187cb1e4790e..3058dc25202ec 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -69,10 +69,7 @@ using namespace llvm; #define DEBUG_TYPE "function-attrs" -STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly"); -STATISTIC(NumReadNone, "Number of functions marked readnone"); -STATISTIC(NumReadOnly, "Number of functions marked readonly"); -STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); +STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumReturned, "Number of arguments marked returned"); STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); @@ -254,79 +251,14 @@ static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, return; } - ModRefInfo MR = ME.getModRef(); - for (Function *F : SCCNodes) { - if (F->doesNotAccessMemory()) - // Already perfect! - continue; - - if (ME.doesNotAccessMemory()) { - // For readnone, remove all other memory attributes. - AttributeMask AttrsToRemove; - AttrsToRemove.addAttribute(Attribute::ReadOnly); - AttrsToRemove.addAttribute(Attribute::WriteOnly); - AttrsToRemove.addAttribute(Attribute::ArgMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - - ++NumReadNone; - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::ReadNone); - Changed.insert(F); - continue; - } - - // Add argmemonly, inaccessiblememonly, or inaccessible_or_argmemonly - // attributes if possible. - AttributeMask AttrsToRemove; - AttrsToRemove.addAttribute(Attribute::ArgMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - if (ME.onlyAccessesArgPointees()) { - if (!F->onlyAccessesArgMemory()) { - NumArgMemOnly++; - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::ArgMemOnly); - Changed.insert(F); - } - } else if (ME.onlyAccessesInaccessibleMem()) { - if (!F->onlyAccessesInaccessibleMemory()) { - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::InaccessibleMemOnly); - Changed.insert(F); - } - } else if (ME.onlyAccessesInaccessibleOrArgMem() && - !F->onlyAccessesInaccessibleMemOrArgMem()) { - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + MemoryEffects OldME = F->getMemoryEffects(); + MemoryEffects NewME = ME & OldME; + if (NewME != OldME) { + ++NumMemoryAttr; + F->setMemoryEffects(NewME); Changed.insert(F); } - - // The SCC contains functions both writing and reading from memory. We - // cannot add readonly or writeonline attributes. - if (MR == ModRefInfo::ModRef) - continue; - - if (F->onlyReadsMemory() && MR == ModRefInfo::Ref) - continue; - - if (F->onlyWritesMemory() && MR == ModRefInfo::Mod) - continue; - - Changed.insert(F); - - // Add in the new attribute. - if (MR == ModRefInfo::Mod) { - ++NumWriteOnly; - F->removeFnAttr(Attribute::ReadOnly); - F->addFnAttr(Attribute::WriteOnly); - } else { - ++NumReadOnly; - assert(MR == ModRefInfo::Ref); - F->removeFnAttr(Attribute::WriteOnly); - F->addFnAttr(Attribute::ReadOnly); - } } } diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index a84d2c4836d11..3c724c81c643b 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1234,19 +1234,22 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, // Initialize DataFlowSanitizer runtime functions and declare them in the module void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { + LLVMContext &C = M.getContext(); { AttributeList AL; - AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind); - AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly); - AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt); + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + AL = AL.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly())); + AL = AL.addRetAttribute(C, Attribute::ZExt); DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL); } { AttributeList AL; - AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind); - AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly); - AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt); + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + AL = AL.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly())); + AL = AL.addRetAttribute(C, Attribute::ZExt); DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction( "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL); } @@ -1470,8 +1473,8 @@ bool DataFlowSanitizer::runImpl( } } - ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); + // TODO: This could be more precise. + ReadOnlyNoneAttrs.addAttribute(Attribute::Memory); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 6490a8d543276..cb4d1b6a34e2c 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4074,12 +4074,9 @@ struct MemorySanitizerVisitor : public InstVisitor { // will become a non-readonly function after it is instrumented by us. To // prevent this code from being optimized out, mark that function // non-readonly in advance. + // TODO: We can likely do better than dropping memory() completely here. AttributeMask B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::WriteOnly) - .addAttribute(Attribute::ArgMemOnly) - .addAttribute(Attribute::Speculatable); + B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable); Call->removeFnAttrs(B); if (Function *Func = Call->getCalledFunction()) { @@ -5769,13 +5766,9 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { MemorySanitizerVisitor Visitor(F, *this, TLI); - // Clear out readonly/readnone attributes. + // Clear out memory attributes. AttributeMask B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::WriteOnly) - .addAttribute(Attribute::ArgMemOnly) - .addAttribute(Attribute::Speculatable); + B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable); F.removeFnAttrs(B); return Visitor.runOnFunction(); diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 689a2a286cb9f..6ad3bc9a69155 100644 --- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -80,10 +80,9 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, Instruction *LibCall = Call->clone(); Builder.Insert(LibCall); - // Add attribute "readnone" so that backend can use a native sqrt instruction - // for this call. - Call->removeFnAttr(Attribute::WriteOnly); - Call->addFnAttr(Attribute::ReadNone); + // Add memory(none) attribute, so that the backend can use a native sqrt + // instruction for this call. + Call->setDoesNotAccessMemory(); // Insert a FP compare instruction and use it as the CurrBB branch condition. Builder.SetInsertPoint(CurrBBTerm); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 68cf9cc77cc48..12fd2e677909f 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1430,10 +1430,7 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // machine model for purposes of optimization. We have to strip these on // both function declarations and call sites. static constexpr Attribute::AttrKind FnAttrsToStrip[] = - {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, - Attribute::ArgMemOnly, Attribute::InaccessibleMemOnly, - Attribute::InaccessibleMemOrArgMemOnly, - Attribute::NoSync, Attribute::NoFree}; + {Attribute::Memory, Attribute::NoSync, Attribute::NoFree}; // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 8f90c2b8ba1a7..fe1c632c85caa 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -590,21 +590,28 @@ bool llvm::runIPSCCP( } } - // If we replaced an argument, the argmemonly and - // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove - // them from both the function and callsites. + // If we replaced an argument, we may now also access a global (currently + // classified as "other" memory). Update memory attribute to reflect this. if (ReplacedPointerArg) { - AttributeMask AttributesToRemove; - AttributesToRemove.addAttribute(Attribute::ArgMemOnly); - AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - F.removeFnAttrs(AttributesToRemove); - + auto UpdateAttrs = [&](AttributeList AL) { + MemoryEffects ME = AL.getMemoryEffects(); + if (ME == MemoryEffects::unknown()) + return AL; + + ME |= MemoryEffects(MemoryEffects::Other, + ME.getModRef(MemoryEffects::ArgMem)); + return AL.addFnAttribute( + F.getContext(), + Attribute::getWithMemoryEffects(F.getContext(), ME)); + }; + + F.setAttributes(UpdateAttrs(F.getAttributes())); for (User *U : F.users()) { auto *CB = dyn_cast(U); if (!CB || CB->getCalledFunction() != &F) continue; - CB->removeFnAttrs(AttributesToRemove); + CB->setAttributes(UpdateAttrs(CB->getAttributes())); } } MadeChanges |= ReplacedPointerArg; diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index f1d1d0d4f1ef3..5fb4ee1cac524 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -75,11 +75,6 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyWritesMemory(Function &F) { if (F.onlyWritesMemory()) // writeonly or readnone return false; - // Turn readonly and writeonly into readnone. - if (F.hasFnAttribute(Attribute::ReadOnly)) { - F.removeFnAttr(Attribute::ReadOnly); - return setDoesNotAccessMemory(F); - } ++NumWriteOnly; F.setOnlyWritesMemory(); return true; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 23b943c69386d..fba9dbb2c9c27 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -904,24 +904,18 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Those attributes cannot be propagated safely. Explicitly list them // here so we get a warning if new attributes are added. case Attribute::AllocSize: - case Attribute::ArgMemOnly: case Attribute::Builtin: case Attribute::Convergent: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: case Attribute::Naked: case Attribute::NoBuiltin: case Attribute::NoMerge: case Attribute::NoReturn: case Attribute::NoSync: - case Attribute::ReadNone: - case Attribute::ReadOnly: case Attribute::ReturnsTwice: case Attribute::Speculatable: case Attribute::StackAlignment: case Attribute::WillReturn: - case Attribute::WriteOnly: case Attribute::AllocKind: case Attribute::PresplitCoroutine: case Attribute::Memory: @@ -984,6 +978,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoUndef: case Attribute::NonNull: case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: case Attribute::Returned: case Attribute::SExt: case Attribute::StructRet: @@ -993,6 +989,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ZExt: case Attribute::ImmArg: case Attribute::ByRef: + case Attribute::WriteOnly: // These are not really attributes. case Attribute::None: case Attribute::EndAttrKinds: diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index 3a0cd5947e2a5..513f305b3a19e 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -429,19 +429,19 @@ entry: } -; CHECK: attributes #0 = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CHECK-NEXT: attributes #1 = { argmemonly nocallback nofree nounwind willreturn } -; CHECK-NEXT: attributes #2 = { argmemonly nosync nounwind willreturn } -; CHECK-NEXT: attributes #3 = { noinline nounwind readonly } -; CHECK-NEXT: attributes #4 = { noinline nounwind writeonly } +; CHECK: attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CHECK-NEXT: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK-NEXT: attributes #2 = { nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK-NEXT: attributes #3 = { noinline nounwind memory(read) } +; CHECK-NEXT: attributes #4 = { noinline nounwind memory(write) } ; CHECK-NEXT: attributes #5 = { nounwind ssp } -; CHECK-NEXT: attributes #6 = { inaccessiblememonly nounwind } -; CHECK-NEXT: attributes #7 = { inaccessiblemem_or_argmemonly nounwind } -; CHECK-NEXT: attributes #8 = { argmemonly nounwind } -; CHECK-NEXT: attributes #9 = { readonly } -; CHECK-NEXT: attributes #10 = { inaccessiblememonly } -; CHECK-NEXT: attributes #11 = { inaccessiblemem_or_argmemonly } -; CHECK-NEXT: attributes #12 = { argmemonly } +; CHECK-NEXT: attributes #6 = { nounwind memory(inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #7 = { nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #8 = { nounwind memory(argmem: readwrite) } +; CHECK-NEXT: attributes #9 = { memory(read) } +; CHECK-NEXT: attributes #10 = { memory(inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #11 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #12 = { memory(argmem: readwrite) } attributes #0 = { argmemonly nounwind } attributes #1 = { noinline nounwind readonly } diff --git a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll index d43445c7a50c4..3fda58721e663 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll @@ -26,6 +26,6 @@ entry: declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 46e9f4e06c2f2..3965286215e3d 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -22,6 +22,6 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/pure-const-dce.ll b/llvm/test/Analysis/BasicAA/pure-const-dce.ll index 8d07084561732..c6b8b0669ab23 100644 --- a/llvm/test/Analysis/BasicAA/pure-const-dce.ll +++ b/llvm/test/Analysis/BasicAA/pure-const-dce.ll @@ -50,5 +50,5 @@ declare i32 @TestPure(i32) readonly declare i32 @TestNone(i32) -; CHECK: attributes [[READNONE]] = { readnone } -; CHECK: attributes [[READONLY]] = { readonly } +; CHECK: attributes [[READNONE]] = { memory(none) } +; CHECK: attributes [[READONLY]] = { memory(read) } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index 6134f578895bc..7f8f7e8c6d662 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -72,14 +72,14 @@ define i32 @test3_no(i8* %p) nounwind { declare void @callee(i32* %p) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind -; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #2 = { nofree nosync nounwind readnone } +; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes #2 = { nofree nosync nounwind memory(none) } ; CHECK: attributes #3 = { nounwind } -; CHECK: attributes #4 = { mustprogress nofree nosync nounwind readnone willreturn } -; CHECK: attributes #5 = { argmemonly mustprogress nofree nosync nounwind willreturn } -; CHECK: attributes #6 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #7 = { argmemonly nocallback nofree nounwind willreturn } +; CHECK: attributes #4 = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #5 = { mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #6 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #7 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; Root note. !0 = !{ } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll index 42166584a78a4..f3b4b1abd2168 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -22,8 +22,8 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[NUW]] = { nounwind } !0 = !{!"tbaa root"} diff --git a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll index cbf5de3d934a8..40e7789b4e44c 100644 --- a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll +++ b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll @@ -23,4 +23,4 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) ; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn } -; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll index afa90ec76a8f1..c3a6dec809301 100644 --- a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll +++ b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll @@ -15,6 +15,6 @@ declare <16 x float> @llvm.masked.expandload.v16f32 (ptr, <16 x i1>, <16 x float ; CHECK: declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr nocapture, <8 x i1>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]] declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) -; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } -; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(read) } diff --git a/llvm/test/Bindings/llvm-c/debug_info.ll b/llvm/test/Bindings/llvm-c/debug_info.ll index 874cf818dca29..a7fcd8a999ef1 100644 --- a/llvm/test/Bindings/llvm-c/debug_info.ll +++ b/llvm/test/Bindings/llvm-c/debug_info.ll @@ -12,13 +12,13 @@ ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata !41, metadata !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)), !dbg !44 ; CHECK-NEXT: } -; CHECK: ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 -; CHECK: ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare void @llvm.dbg.value(metadata, metadata, metadata) #0 -; CHECK: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: !llvm.dbg.cu = !{!0} ; CHECK-NEXT: !FooType = !{!28} diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll index 6a645fbaed79d..f9aef5d2f612d 100644 --- a/llvm/test/Bitcode/attributes-3.3.ll +++ b/llvm/test/Bitcode/attributes-3.3.ll @@ -213,8 +213,8 @@ define void @f34() ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } -; CHECK: attributes #2 = { readnone } -; CHECK: attributes #3 = { readonly } +; CHECK: attributes #2 = { memory(none) } +; CHECK: attributes #3 = { memory(read) } ; CHECK: attributes #4 = { noinline } ; CHECK: attributes #5 = { alwaysinline } ; CHECK: attributes #6 = { optsize } diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index f8d31722acf07..7d42ae9b8a073 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -540,8 +540,8 @@ define void @f88() skipprofile { ret void } ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } -; CHECK: attributes #2 = { readnone } -; CHECK: attributes #3 = { readonly } +; CHECK: attributes #2 = { memory(none) } +; CHECK: attributes #3 = { memory(read) } ; CHECK: attributes #4 = { noinline } ; CHECK: attributes #5 = { alwaysinline } ; CHECK: attributes #6 = { optsize } @@ -564,13 +564,13 @@ define void @f88() skipprofile { ret void } ; CHECK: attributes #23 = { noinline optnone } ; CHECK: attributes #24 = { jumptable } ; CHECK: attributes #25 = { convergent } -; CHECK: attributes #26 = { argmemonly } +; CHECK: attributes #26 = { memory(argmem: readwrite) } ; CHECK: attributes #27 = { norecurse } -; CHECK: attributes #28 = { inaccessiblememonly } -; CHECK: attributes #29 = { inaccessiblemem_or_argmemonly } +; CHECK: attributes #28 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #29 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #30 = { allocsize(0) } ; CHECK: attributes #31 = { allocsize(0,1) } -; CHECK: attributes #32 = { writeonly } +; CHECK: attributes #32 = { memory(write) } ; CHECK: attributes #33 = { speculatable } ; CHECK: attributes #34 = { sanitize_hwaddress } ; CHECK: attributes #35 = { shadowcallstack } diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll index edc363fc50354..932f63d80e344 100644 --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -1168,8 +1168,8 @@ define void @intrinsics.codegen() { ; CHECK: attributes #15 = { nounwind } ; CHECK: attributes #16 = { noinline optnone } ; CHECK: attributes #17 = { optsize } -; CHECK: attributes #18 = { readnone } -; CHECK: attributes #19 = { readonly } +; CHECK: attributes #18 = { memory(none) } +; CHECK: attributes #19 = { memory(read) } ; CHECK: attributes #20 = { returns_twice } ; CHECK: attributes #21 = { sanitize_address } ; CHECK: attributes #22 = { sanitize_memory } @@ -1179,12 +1179,12 @@ define void @intrinsics.codegen() { ; CHECK: attributes #26 = { sspstrong } ; CHECK: attributes #27 = { uwtable } ; CHECK: attributes #28 = { "cpu"="cortex-a8" } -; CHECK: attributes #29 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #29 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #31 = { argmemonly nounwind readonly } -; CHECK: attributes #32 = { argmemonly nounwind } -; CHECK: attributes #33 = { nounwind readonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #31 = { nounwind memory(argmem: read) } +; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #33 = { nounwind memory(read) } +; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll index 81b0551e7045f..f32ae0a4668d5 100644 --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1229,8 +1229,8 @@ define void @misc.metadata() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1242,12 +1242,12 @@ define void @misc.metadata() { ; CHECK: attributes #29 = { "thunk" } ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } -; CHECK: attributes #32 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #34 = { argmemonly nounwind readonly } -; CHECK: attributes #35 = { argmemonly nounwind } -; CHECK: attributes #36 = { nounwind readonly } -; CHECK: attributes #37 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #34 = { nounwind memory(argmem: read) } +; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #36 = { nounwind memory(read) } +; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #38 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll index 3cab693081fe6..8e9a1a193e299 100644 --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1536,8 +1536,8 @@ normal: ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1550,14 +1550,14 @@ normal: ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #41 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index 0411f5decca7f..ebb50e40d2fe7 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1609,8 +1609,8 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1623,15 +1623,15 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index 4ce6a436c9c49..3c3f943e7e8a4 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1634,8 +1634,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1648,15 +1648,15 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll index 4a6377dd718d8..cc7e6fe0ea897 100644 --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -1649,8 +1649,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1663,16 +1663,16 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } ; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll index 17ad8db81a0ab..125cfac5e9b54 100644 --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -1660,8 +1660,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1674,16 +1674,16 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } ; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index ac97f9d79e7ad..60c2e375f4c22 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1985,8 +1985,8 @@ declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1999,15 +1999,15 @@ declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { speculatable } ; CHECK: attributes #43 = { strictfp } ; CHECK: attributes #44 = { nosanitize_coverage } diff --git a/llvm/test/Bitcode/ptest-new.ll b/llvm/test/Bitcode/ptest-new.ll index 68d53ff3385b8..952ea795d62fb 100644 --- a/llvm/test/Bitcode/ptest-new.ll +++ b/llvm/test/Bitcode/ptest-new.ll @@ -23,4 +23,4 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Bitcode/ptest-old.ll b/llvm/test/Bitcode/ptest-old.ll index a41afc0c39bf8..b09fac9c397b1 100644 --- a/llvm/test/Bitcode/ptest-old.ll +++ b/llvm/test/Bitcode/ptest-old.ll @@ -24,4 +24,4 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Bitcode/upgrade-frame-pointer.ll b/llvm/test/Bitcode/upgrade-frame-pointer.ll index 5251db9ec2db1..f723c33d644a8 100644 --- a/llvm/test/Bitcode/upgrade-frame-pointer.ll +++ b/llvm/test/Bitcode/upgrade-frame-pointer.ll @@ -27,7 +27,7 @@ attributes #1 = { readnone "no-frame-pointer-elim"="false" "no-frame-pointer-eli ;; Other attributes (e.g. readnone) are unaffected. ; CHECK: attributes #0 = { "frame-pointer"="all" } -; CHECK: attributes #1 = { readnone "frame-pointer"="all" } +; CHECK: attributes #1 = { memory(none) "frame-pointer"="all" } ; CHECK: attributes #2 = { "frame-pointer"="non-leaf" } -; CHECK: attributes #3 = { readnone "frame-pointer"="non-leaf" } +; CHECK: attributes #3 = { memory(none) "frame-pointer"="non-leaf" } ; CHECK: attributes #4 = { "frame-pointer"="none" } diff --git a/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll b/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll index 3b4e9aed3b00b..b9e3c4fb5d6bb 100644 --- a/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll +++ b/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll @@ -13,9 +13,9 @@ define void @test(i8* %p1, i16* %p16) { ret void } -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) ; CHECK: declare i8* @llvm.launder.invariant.group.p0i8(i8*) -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) ; CHECK: declare i16* @llvm.launder.invariant.group.p0i16(i16*) declare i8* @llvm.invariant.group.barrier(i8*) declare i8* @llvm.invariant.group.barrier.p0i8(i8*) diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 1d1048ada8709..b3be3702b3f52 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -226,10 +226,10 @@ define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index b05054d8a03d5..163bd3ee063fb 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -928,7 +928,7 @@ attributes #4 = { nounwind sanitize_address } attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" } ; AKF_HSA: attributes #[[ATTR3]] = { nounwind } @@ -936,7 +936,7 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address } ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 61ba99bc16f7d..4acf31b249590 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -642,11 +642,11 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll index 33ad439b0d977..9d8d4a1064032 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -414,10 +414,10 @@ attributes #1 = { nounwind } ; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. -; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index 68bc38bd9f4a0..222a8a26f7f82 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -6,14 +6,14 @@ ; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 -; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" } -; UNSAFE: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } +; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "unsafe-fp-math"="true" } +; UNSAFE: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } -; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" } -; NOINFS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } +; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-infs-fp-math"="true" } +; NOINFS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } -; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" } -; NONANS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } +; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-nans-fp-math"="true" } +; NONANS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } define float @foo(float %x) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll index d9d511f72ae8e..aa3fff44b9f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -69,8 +69,8 @@ declare i64 @llvm.amdgcn.s.getpc() #0 attributes #0 = { nounwind readnone speculatable willreturn } ;. -; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 76e1288bf7ac0..95307c84bf948 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -791,5 +791,5 @@ entry: ; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]] ; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } -; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind readonly } +; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 9367b4fd47bbb..c28aa55e346f2 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/Feature/OperandBundles/function-attrs.ll b/llvm/test/Feature/OperandBundles/function-attrs.ll index cfb67421ebb09..1db14cc7b5383 100644 --- a/llvm/test/Feature/OperandBundles/function-attrs.ll +++ b/llvm/test/Feature/OperandBundles/function-attrs.ll @@ -43,8 +43,8 @@ define void @test_3(i32* %x) { ret void } -; CHECK: attributes #0 = { nofree readonly } -; CHECK: attributes #1 = { nofree nosync readnone } -; CHECK: attributes #2 = { writeonly } +; CHECK: attributes #0 = { nofree memory(read) } +; CHECK: attributes #1 = { nofree nosync memory(none) } +; CHECK: attributes #2 = { memory(write) } ; CHECK: attributes #3 = { nofree } ; CHECK: attributes #4 = { nofree nosync } diff --git a/llvm/test/Feature/intrinsics.ll b/llvm/test/Feature/intrinsics.ll index e7078c4a952ba..bd2c469395ae3 100644 --- a/llvm/test/Feature/intrinsics.ll +++ b/llvm/test/Feature/intrinsics.ll @@ -69,5 +69,5 @@ define void @trap() { ret void } -; CHECK: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #1 = { cold noreturn nounwind } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll index 0c065d6c29b0e..5cfc1f16af8fe 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll @@ -35,10 +35,10 @@ define void @store(i8* %p) { ; CHECK: declare void @__dfsan_mem_transfer_callback(i[[#SBITS]]*, i64) ; CHECK: declare void @__dfsan_cmp_callback(i[[#SBITS]]) -; CHECK: ; Function Attrs: nounwind readonly +; CHECK: ; Function Attrs: nounwind memory(read) ; CHECK-NEXT: declare zeroext i[[#SBITS]] @__dfsan_union_load(i[[#SBITS]]*, i64) -; CHECK: ; Function Attrs: nounwind readonly +; CHECK: ; Function Attrs: nounwind memory(read) ; CHECK-NEXT: declare zeroext i64 @__dfsan_load_label_and_origin(i8*, i64) ; CHECK: declare void @__dfsan_unimplemented(i8*) diff --git a/llvm/test/Instrumentation/MemorySanitizer/attributes.ll b/llvm/test/Instrumentation/MemorySanitizer/attributes.ll index 43452f570b28c..c2825ab3fc630 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/attributes.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/attributes.ll @@ -44,11 +44,8 @@ entry: ret void } -; CHECK-NOT: readnone -; CHECK-NOT: readonly -; CHECK-NOT: writeonly -; CHECK-NOT: argmemonly +; CHECK-NOT: memory( ; CHECK-NOT: speculatable -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) ; CHECK-NEXT: declare void @llvm.donothing diff --git a/llvm/test/Other/attribute-comment.ll b/llvm/test/Other/attribute-comment.ll index eab96e8aa9cd5..cf4076eec7c82 100644 --- a/llvm/test/Other/attribute-comment.ll +++ b/llvm/test/Other/attribute-comment.ll @@ -1,6 +1,6 @@ ; RUN: opt -S < %s | FileCheck %s -strict-whitespace -; CHECK: {{^}}; Function Attrs: nounwind readnone ssp uwtable{{$}} +; CHECK: {{^}}; Function Attrs: nounwind ssp memory(none) uwtable{{$}} ; CHECK-NEXT: define void @test1() #0 define void @test1() #0 { ret void diff --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll index 70f6c1f508deb..93056e962bd82 100644 --- a/llvm/test/Other/cgscc-devirt-iteration.ll +++ b/llvm/test/Other/cgscc-devirt-iteration.ll @@ -15,7 +15,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='default' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2 declare void @readnone() readnone -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-NEXT: declare void @readnone() declare void @unknown() @@ -28,7 +28,7 @@ declare void @unknown() define void @test1() { ; BEFORE-NOT: Function Attrs -; AFTER: Function Attrs: nofree nosync readnone +; AFTER: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define void @test1() entry: %fptr = alloca void ()* @@ -51,13 +51,13 @@ entry: ; devirtualize again, and then deduce readnone. declare void @readnone_with_arg(void ()**) readnone -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: declare void @readnone_with_arg(void ()**) define void @test2_a(void ()** %ignore) { ; BEFORE-NOT: Function Attrs -; AFTER1: Function Attrs: nofree readonly -; AFTER2: Function Attrs: nofree nosync readnone +; AFTER1: Function Attrs: nofree memory(read) +; AFTER2: Function Attrs: nofree nosync memory(none) ; BEFORE: define void @test2_a(void ()** %ignore) ; AFTER: define void @test2_a(void ()** readnone %ignore) entry: @@ -77,8 +77,8 @@ entry: define void @test2_b() { ; BEFORE-NOT: Function Attrs -; AFTER1: Function Attrs: nofree readonly -; AFTER2: Function Attrs: nofree nosync readnone +; AFTER1: Function Attrs: nofree memory(read) +; AFTER2: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define void @test2_b() entry: %f2ptr = alloca void ()* diff --git a/llvm/test/Other/cgscc-iterate-function-mutation.ll b/llvm/test/Other/cgscc-iterate-function-mutation.ll index f4e4b889f50d6..50e0c748076eb 100644 --- a/llvm/test/Other/cgscc-iterate-function-mutation.ll +++ b/llvm/test/Other/cgscc-iterate-function-mutation.ll @@ -338,4 +338,4 @@ exit: ret void } -; CHECK: attributes #0 = { nofree nosync readnone } +; CHECK: attributes #0 = { nofree nosync memory(none) } diff --git a/llvm/test/Other/invariant.group.ll b/llvm/test/Other/invariant.group.ll index c757aff25024e..94b1bc7cd958e 100644 --- a/llvm/test/Other/invariant.group.ll +++ b/llvm/test/Other/invariant.group.ll @@ -91,11 +91,11 @@ declare void @use(i8* readonly) declare void @useBool(i1) declare void @clobber(i8*) -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn{{$}} +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite){{$}} ; CHECK-NEXT: declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.launder.invariant.group.p0i8(i8*) -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn{{$}} +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none){{$}} ; CHECK-NEXT: declare i8* @llvm.strip.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Other/opt-override-mcpu-mattr.ll b/llvm/test/Other/opt-override-mcpu-mattr.ll index bb95bd4ce54b1..19dee8bbad346 100644 --- a/llvm/test/Other/opt-override-mcpu-mattr.ll +++ b/llvm/test/Other/opt-override-mcpu-mattr.ll @@ -4,8 +4,8 @@ ; target-cpu and target-features using command line options -mcpu and ; -mattr. -; CHECK: attributes #0 = { nounwind readnone ssp uwtable "target-cpu"="broadwell" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } -; CHECK: attributes #1 = { nounwind readnone ssp uwtable "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } +; CHECK: attributes #0 = { nounwind ssp memory(none) uwtable "target-cpu"="broadwell" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } +; CHECK: attributes #1 = { nounwind ssp memory(none) uwtable "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } define i32 @no_target_cpu() #0 { entry: diff --git a/llvm/test/Other/print-module-scope.ll b/llvm/test/Other/print-module-scope.ll index 4a0525dbb380b..e2dc4e965b38d 100644 --- a/llvm/test/Other/print-module-scope.ll +++ b/llvm/test/Other/print-module-scope.ll @@ -30,7 +30,7 @@ ; FOO: define void @foo ; FOO: Function Attrs: nounwind ; FOO: define void @bar -; FOO: Function Attrs: nounwind readnone ssp +; FOO: Function Attrs: nounwind ssp memory(none) ; FOO: declare void @baz define void @foo() nounwind ssp { @@ -49,6 +49,6 @@ attributes #0 = { nounwind "frame-pointer"="all" } attributes #1 = { nounwind readnone ssp "use-soft-float"="false" } ; FOO: attributes #{{[0-9]}} = { nounwind "frame-pointer"="all" } -; FOO: attributes #{{[0-9]}} = { nounwind readnone ssp "use-soft-float"="false" } +; FOO: attributes #{{[0-9]}} = { nounwind ssp memory(none) "use-soft-float"="false" } ; FOO-NOT: IR Dump After {{Simplify the CFG|SimplifyCFGPass}} diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index d7f1dd125db4e..e1941d2aa75af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal i32 @deref(i32* %x) nounwind { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@deref ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -18,7 +18,7 @@ entry: } define i32 @f(i32 %x) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: (i32 returned [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -26,7 +26,7 @@ define i32 @f(i32 %x) { ; TUNIT-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -42,9 +42,9 @@ entry: ret i32 %tmp1 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll index 7d374bfe835a1..553ba1c633de8 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -7,7 +7,7 @@ ; because there is a load of %A in the entry block define internal i32 @callee(i1 %C, i32* %A) { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -36,13 +36,13 @@ F: } define i32 @foo(i32* %A) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i32* nocapture nofree readonly [[A:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call i32 @callee(i32* nocapture nofree readonly align 4 [[A]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @callee(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]] @@ -53,10 +53,10 @@ define i32 @foo(i32* %A) { } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll index d4f6c99543433..87f38af7f8a8d 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@hash ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -14,13 +14,13 @@ entry: } define void @encode(i32* %m, i32* %ts, i32* %new) nounwind { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@encode ; TUNIT-SAME: (i32* nocapture nofree readnone [[M:%.*]], i32* nocapture nofree readnone [[TS:%.*]], i32* nocapture nofree readnone [[NEW:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@encode ; CGSCC-SAME: (i32* nocapture nofree readnone [[M:%.*]], i32* nocapture nofree readnone [[TS:%.*]], i32* nocapture nofree readnone [[NEW:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -31,8 +31,8 @@ entry: unreachable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree noreturn nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree noreturn nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll index f83383ffb2943..be4d00168b773 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@term_SharingList ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -25,7 +25,7 @@ bb5: ; preds = %entry } define i32 @term_Sharing(i32* %Term) nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@term_Sharing ; CHECK-SAME: (i32* nocapture nofree readnone [[TERM:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -46,5 +46,5 @@ bb14: ; preds = %entry ret i32 0 } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 9c4839bd22293..59c3fcecb7bcf 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2 ; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -22,7 +22,7 @@ bb: } define void @no_promote(<4 x i64>* %arg) #1 { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@no_promote ; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -35,7 +35,7 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@no_promote ; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -60,7 +60,7 @@ bb: } define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@promote_avx2 ; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: bb: @@ -77,7 +77,7 @@ bb: } define void @promote(<4 x i64>* %arg) #0 { -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@promote ; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -91,7 +91,7 @@ define void @promote(<4 x i64>* %arg) #0 { ; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@promote ; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -123,15 +123,15 @@ attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #1 = { nounwind uwtable } attributes #2 = { argmemonly nounwind } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "target-features"="+avx2" } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "target-features"="+avx2" } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 5cf7340a2f78b..660deb3cd8015 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -27,7 +27,7 @@ bb: define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -41,7 +41,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -69,7 +69,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: bb: @@ -87,7 +87,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -101,7 +101,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -129,7 +129,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: bb: @@ -147,7 +147,7 @@ bb: define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -161,7 +161,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -189,7 +189,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: bb: @@ -207,7 +207,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -221,7 +221,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -249,7 +249,7 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: bb: @@ -265,7 +265,7 @@ bb: define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -278,7 +278,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -305,7 +305,7 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: bb: @@ -321,7 +321,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -334,7 +334,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -361,7 +361,7 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: bb: @@ -379,7 +379,7 @@ bb: define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: bb: @@ -393,7 +393,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -421,7 +421,7 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: bb: @@ -439,7 +439,7 @@ bb: define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: bb: @@ -453,7 +453,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -488,19 +488,19 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } attributes #5 = { argmemonly nounwind } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5]] = { willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR1]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR2]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR3]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR5]] = { willreturn } ; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR1]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR2]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR3]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } ; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll index d08c230e79607..84aac94f429af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll @@ -9,7 +9,7 @@ ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = constant [[T:%.*]] { i32 0, i32 0, i32 17, i32 25 } ;. define internal i32 @test(%T* %p) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -26,13 +26,13 @@ entry: define i32 @caller() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -44,9 +44,9 @@ entry: ret i32 %v } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index 0d3e26e4b3aeb..31fc10745c1c6 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -40,7 +40,7 @@ declare void @z(i32) ; Test2 ; Different alignemnt privatizable arguments define internal i32 @test(i32* %X, i64* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[Y_PRIV:%.*]] = alloca i64, align 8 @@ -69,7 +69,7 @@ Return2: } define internal i32 @caller(i32* %A) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A_PRIV:%.*]] = alloca i32, align 4 @@ -84,13 +84,13 @@ define internal i32 @caller(i32* %A) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 3 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @caller(i32 noundef 2) #[[ATTR4:[0-9]+]] @@ -102,11 +102,11 @@ define i32 @callercaller() { ret i32 %X } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll index 26d738a59469f..13c4ecba9cc66 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -7,7 +7,7 @@ ; Don't drop 'byval' on %X here. define internal i32 @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32 %i) nounwind { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -43,7 +43,7 @@ entry: ; Also make sure we don't drop the call zeroext attribute. define i32 @test(i32* %X) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: (i32* nocapture nofree readonly [[X:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -59,7 +59,7 @@ define i32 @test(i32* %X) { ; TUNIT-NEXT: [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[C]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -82,10 +82,10 @@ entry: ret i32 %c } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 457e53f1ab45d..e8aea37d37f95 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @test(i32* %X, i32* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4 @@ -21,7 +21,7 @@ define internal i32 @test(i32* %X, i32* %Y) { } define internal i32 @caller(i32* %B) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[B_PRIV:%.*]] = alloca i32, align 4 @@ -36,13 +36,13 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 3 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @caller(i32 noundef 2) #[[ATTR4:[0-9]+]] @@ -55,11 +55,11 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll index 3e15ba83fef80..e6b145c5f3902 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -5,7 +5,7 @@ %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -35,7 +35,7 @@ entry: define i32 @test(i32* %X) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: (i32* nocapture nofree readonly [[X:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -51,7 +51,7 @@ define i32 @test(i32* %X) { ; TUNIT-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -72,10 +72,10 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index c14aacf6011a2..e27904ebd3113 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -7,7 +7,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 %struct.ss = type { i32, i64 } define internal i32 @f(%struct.ss* byval(%struct.ss) %b) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -32,7 +32,7 @@ entry: define internal i32 @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@g ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -57,7 +57,7 @@ entry: define i32 @main() nounwind { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -65,20 +65,20 @@ define i32 @main() nounwind { ; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; TUNIT-NEXT: store i32 1, i32* [[TMP1]], align 8 ; TUNIT-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; TUNIT-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 -; TUNIT-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 -; TUNIT-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 -; TUNIT-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 ; TUNIT-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 -; TUNIT-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 +; TUNIT-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 +; TUNIT-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]] +; TUNIT-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 +; TUNIT-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 +; TUNIT-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 ; TUNIT-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR2]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -104,11 +104,11 @@ entry: ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll index 0ed328d0a6cf5..2a6d3dc3378a1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll @@ -11,7 +11,7 @@ ;. define internal i32 @test(i32** %x) { ; -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -25,14 +25,14 @@ entry: } define i32 @caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[X:%.*]] = call i32 @test() #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -45,10 +45,10 @@ entry: } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll index bf8441ebfec2c..44ce05fd82363 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll @@ -4,7 +4,7 @@ ; Don't promote around control flow. define internal i32 @callee(i1 %C, i32* %P) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -27,14 +27,14 @@ F: } define i32 @foo(i1 %C, i32* %P) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture nofree readonly [[P]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -47,10 +47,10 @@ entry: } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll index ab2cb25792f39..0386d1fe9808f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -5,7 +5,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @callee(i1 %C, i32* %P) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[P_PRIV:%.*]] = alloca i32, align 4 @@ -28,13 +28,13 @@ F: ; preds = %0 } define i32 @foo() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 17 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @callee(i32 noundef 17) #[[ATTR2:[0-9]+]] @@ -47,9 +47,9 @@ define i32 @foo() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll index c26c758e66060..7afc19674ae34 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll @@ -7,7 +7,7 @@ ; Inlining should nuke the invoke (and any inlined calls) here even with ; argument promotion running along with it. define void @zot() personality i32 (...)* @wibble { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@zot ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] personality i32 (...)* @wibble { ; TUNIT-NEXT: bb: @@ -18,7 +18,7 @@ define void @zot() personality i32 (...)* @wibble { ; TUNIT: bb2: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@zot ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] personality i32 (...)* @wibble { ; CGSCC-NEXT: bb: @@ -43,13 +43,13 @@ bb2: } define internal void @hoge() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@hoge ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@hoge ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -62,7 +62,7 @@ bb: } define internal fastcc i8* @spam(i1 (i8*)* %arg) { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@spam ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -85,7 +85,7 @@ bb: } define internal i1 @barney(i8* %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@barney ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -96,13 +96,13 @@ bb: } define i32 @test_inf_promote_caller(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_inf_promote_caller ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -119,7 +119,7 @@ bb: } define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_callee ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -137,13 +137,13 @@ bb: declare i32 @wibble(...) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { noreturn nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { noreturn nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { noreturn nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { noreturn nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll index ad98b8a4eb562..bb62017e2aa34 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -19,13 +19,13 @@ target triple = "x86_64-unknown-linux-gnu" ;. define void @run() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@run ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@run ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -40,7 +40,7 @@ entry: } define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,7 +54,7 @@ entry: } define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UseLongDoubleSafely ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret x86_fp80 undef @@ -65,7 +65,7 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 } define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@AccessPaddingOfStruct ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i64 undef @@ -76,7 +76,7 @@ define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) %a) { } define internal i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) %a) { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@CaptureAStruct ; CGSCC-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -104,9 +104,9 @@ loop: br label %loop } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index 30ecb4450596c..01f537744a452 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -8,7 +8,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 ; Argpromote + sroa should change this to passing the two integers by value. define internal i32 @f(%struct.ss* inalloca(%struct.ss) %s) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull inalloca([[STRUCT_SS:%.*]]) align 4 dereferenceable(8) [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -29,7 +29,7 @@ entry: } define i32 @main() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -41,7 +41,7 @@ define i32 @main() { ; TUNIT-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull inalloca([[STRUCT_SS]]) align 4 dereferenceable(8) [[S]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -65,7 +65,7 @@ entry: ; Argpromote can't promote %a because of the icmp use. define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca(%struct.ss) %b) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g ; CGSCC-SAME: (%struct.ss* noalias nocapture nofree nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* noalias nocapture nofree nonnull writeonly inalloca([[STRUCT_SS:%.*]]) align 4 dereferenceable(8) [[B:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -77,13 +77,13 @@ entry: } define i32 @test() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -95,12 +95,12 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll index c23760a146f36..4446607a08fae 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll @@ -21,7 +21,7 @@ entry: } define i32 @b() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@b ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -33,7 +33,7 @@ entry: } define i32 @c() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -46,5 +46,5 @@ entry: ret i32 %result } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 8d383f8c9f94c..8b878da52962b 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -13,7 +13,7 @@ define internal void @dead() { } define internal i32 @test(i32* %X, i32* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] @@ -34,7 +34,7 @@ dead: } define internal i32 @caller(i32* %B) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -48,13 +48,13 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -68,11 +68,11 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR4]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index a2f862cf2cf0a..c65dfa45b305e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -13,7 +13,7 @@ define internal void @dead() { } define internal i32 @test(i32* %X, i32* %Y) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] @@ -34,14 +34,14 @@ dead: } define internal i32 @caller(i32* %B) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -55,14 +55,14 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2]] ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -77,12 +77,12 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll index e913052a81279..a23e39156e7c8 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll @@ -8,7 +8,7 @@ %T = type { i32, i32, i32, i32 } define internal i32 @test(%T* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -27,13 +27,13 @@ define internal i32 @test(%T* %p) { } define i32 @caller(%T* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @test(%T* nocapture nofree readonly [[P]]) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @test(%T* nocapture nofree readonly [[P]]) #[[ATTR5:[0-9]+]] @@ -46,12 +46,12 @@ define i32 @caller(%T* %p) { ; Don't promote arguments of musttail caller define i32 @foo(%T* %p, i32 %v) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (%T* nocapture nofree readnone [[P:%.*]], i32 [[V:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (%T* nocapture nofree readnone [[P:%.*]], i32 [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: ret i32 0 @@ -60,7 +60,7 @@ define i32 @foo(%T* %p, i32 %v) { } define internal i32 @test2(%T* %p, i32 %p2) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test2 ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -68,7 +68,7 @@ define internal i32 @test2(%T* %p, i32 %p2) { ; CGSCC-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; CGSCC-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; CGSCC-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @foo(%T* undef, i32 [[V]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @foo(%T* undef, i32 [[V]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CA]] ; %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 @@ -81,12 +81,12 @@ define internal i32 @test2(%T* %p, i32 %p2) { } define i32 @caller2(%T* %g) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: (%T* nocapture nofree readnone [[G:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR5]] @@ -101,14 +101,14 @@ define i32 @caller2(%T* %g) { ; is kept as well. define i32 @bar(%T* %p, i32 %v) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@bar ; TUNIT-SAME: (%T* nocapture nofree nonnull writeonly dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[I32PTR:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 0 ; TUNIT-NEXT: store i32 [[V]], i32* [[I32PTR]], align 4 ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (%T* nocapture nofree nonnull writeonly dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[I32PTR:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 0 @@ -121,7 +121,7 @@ define i32 @bar(%T* %p, i32 %v) { } define internal i32 @test2b(%T* %p, i32 %p2) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test2b ; TUNIT-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -129,10 +129,10 @@ define internal i32 @test2b(%T* %p, i32 %p2) { ; TUNIT-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; TUNIT-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; TUNIT-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; TUNIT-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR4]] ; TUNIT-NEXT: ret i32 [[CA]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test2b ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -140,7 +140,7 @@ define internal i32 @test2b(%T* %p, i32 %p2) { ; CGSCC-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; CGSCC-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; CGSCC-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR7:[0-9]+]] +; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR6:[0-9]+]] ; CGSCC-NEXT: ret i32 [[CA]] ; %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 @@ -153,37 +153,34 @@ define internal i32 @test2b(%T* %p, i32 %p2) { } define i32 @caller2b(%T* %g) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@caller2b ; TUNIT-SAME: (%T* nocapture nofree readonly [[G:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 undef) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 undef) #[[ATTR4]] ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller2b ; CGSCC-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR7:[0-9]+]] ; CGSCC-NEXT: ret i32 [[V]] ; %v = call i32 @test2b(%T* %g, i32 0) ret i32 %v } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } +; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll index c7182fbd3f5d6..bc5d660beb7f3 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -12,7 +12,7 @@ %fun_t = type void (%p_t)* define void @foo() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP:%.*]] = alloca void (i16*)*, align 8 @@ -24,7 +24,7 @@ define void @foo() { } define internal void @bar(%p_t %p) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (i16* nocapture nofree readnone [[P:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: call void @llvm.dbg.value(metadata i16* [[P]], metadata [[META3:![0-9]+]], metadata !DIExpression()) #[[ATTR2:[0-9]+]], !dbg [[DBG5:![0-9]+]] @@ -47,12 +47,12 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !5 = !DIExpression() !6 = !DILocation(line: 1, column: 1, scope: !3) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. ; TUNIT: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) ; TUNIT: [[META1:![0-9]+]] = !DIFile(filename: "test.c", directory: "") diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll index b9e8031e5b7b2..1ddebf1fe47a2 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -7,13 +7,13 @@ target triple = "x86_64-pc-windows-msvc" define internal void @add({i32, i32}* %this, i32* sret(i32) %r) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@add ; TUNIT-SAME: ({ i32, i32 }* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: store i32 undef, i32* [[R]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@add ; CGSCC-SAME: ({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 @@ -34,14 +34,14 @@ define internal void @add({i32, i32}* %this, i32* sret(i32) %r) { } define void @f() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[R:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef, i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = alloca i32, align 4 @@ -56,11 +56,11 @@ define void @f() { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll index dfa45b12b1d50..8992fb1e991a3 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -45,7 +45,7 @@ return: ; preds = %entry } define internal i32 @vfu2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@vfu2 ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -86,7 +86,7 @@ define i32 @unions() nounwind { ; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 ; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1 ; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 -; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR0]] ; TUNIT-NEXT: ret i32 [[RESULT]] ; ; CGSCC: Function Attrs: nounwind @@ -110,7 +110,7 @@ entry: } define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@vfu2_v2 ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -155,7 +155,7 @@ define i32 @unions_v2() nounwind { ; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 ; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1 ; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 -; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2]] +; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR0]] ; TUNIT-NEXT: ret i32 [[RESULT]] ; ; CGSCC: Function Attrs: nounwind @@ -172,10 +172,6 @@ entry: ret i32 %result } ;. -; TUNIT: attributes #[[ATTR0]] = { nounwind } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR2]] = { nounwind readonly } -;. -; CGSCC: attributes #[[ATTR0]] = { nounwind } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; CHECK: attributes #[[ATTR0]] = { nounwind } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll index 9e8e666991c9a..206db6d74e564 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -7,13 +7,13 @@ target triple = "x86_64-unknown-linux-gnu" define i64 @fn2() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -31,7 +31,7 @@ entry: define i64 @fn2b(i32 %arg) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2b ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -39,7 +39,7 @@ define i64 @fn2b(i32 %arg) { ; TUNIT-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] ; TUNIT-NEXT: ret i64 [[DIV]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2b ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -56,13 +56,13 @@ entry: } define i64 @fn2c() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2c ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2c ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i64 @fn1(i64 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn1 ; CGSCC-SAME: (i64 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -91,9 +91,9 @@ entry: ret i64 %cond } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll index a2627ecd50542..b65be6a5d511d 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @fn2(i32* %P, i1 %C) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@fn2 ; TUNIT-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -21,7 +21,7 @@ define void @fn2(i32* %P, i1 %C) { ; TUNIT: exit: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@fn2 ; CGSCC-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,7 +54,7 @@ exit: } define internal i32 @fn1(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn1 ; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -116,7 +116,7 @@ exit: } define internal i32 @fn0(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn0 ; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -128,10 +128,10 @@ entry: ret i32 %cond } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind null_pointer_is_valid } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll index 97168799d9814..79a6774836475 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll @@ -8,7 +8,7 @@ declare dso_local fastcc float @bar(%struct.wobble* noalias, <8 x i32>) unnamed_addr define %struct.zot @widget(<8 x i32> %arg) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@widget ; CHECK-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -19,14 +19,14 @@ bb: } define void @baz(<8 x i32> %arg) local_unnamed_addr { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@baz ; TUNIT-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_ZOT:%.*]] undef, 0, 0 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@baz ; CGSCC-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -38,8 +38,8 @@ bb: ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll index 1222ca360a3e3..8ddb79b4517e5 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll @@ -40,7 +40,7 @@ define dso_local i16 @foo(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]]) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]]) @@ -51,7 +51,7 @@ define dso_local i16 @foo(i16 %a) { } define internal i16 @bar(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i16 0 @@ -66,7 +66,7 @@ define dso_local i16 @foo2(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]]) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo2 ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]]) @@ -77,7 +77,7 @@ define dso_local i16 @foo2(i16 %a) { } define internal i16 @bar2(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar2 ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = add i16 [[P1]], [[P2]] @@ -101,7 +101,7 @@ define dso_local i16 @vararg_tests(i16 %a) { ; TUNIT-NEXT: [[ADD:%.*]] = add i16 7, [[CALL2]] ; TUNIT-NEXT: ret i16 [[ADD]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@vararg_tests ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CALL1:%.*]] = call i16 (i16, ...) @vararg_prop(i16 noundef 7, i16 noundef 8, i16 [[A]]) #[[ATTR2:[0-9]+]] @@ -116,7 +116,7 @@ define dso_local i16 @vararg_tests(i16 %a) { } define internal i16 @vararg_prop(i16 %p1, ...) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@vararg_prop ; CGSCC-SAME: (i16 [[P1:%.*]], ...) #[[ATTR1]] { ; CGSCC-NEXT: ret i16 7 @@ -125,7 +125,7 @@ define internal i16 @vararg_prop(i16 %p1, ...) { } define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@vararg_no_prop ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR1]] { ; CHECK-NEXT: ret i16 7 @@ -135,9 +135,9 @@ define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) { ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll index 701aa37bf66eb..7b89d2cccf6b2 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll @@ -12,7 +12,7 @@ define dso_local i16 @foo(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7) @@ -23,7 +23,7 @@ define dso_local i16 @foo(i16 %a) { } define internal i16 @bar(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i16 [[P1:%.*]], i16 returned [[P2:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i16 [[P2]] @@ -34,8 +34,8 @@ define internal i16 @bar(i16 %p1, i16 %p2) { ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll b/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll index 4d6ce9cb836c3..34141fc57e1bb 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll @@ -5,7 +5,7 @@ ; See PR26774 define i32 @baz() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@baz ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 10 @@ -45,8 +45,8 @@ define i32 @bar() { ret i32 %val } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { norecurse } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll index 43b30e6bc7de2..10c4d87f53491 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll @@ -17,7 +17,7 @@ ; CGSCC: @[[BAR_L:[a-zA-Z0-9_$"\\.-]+]] = internal constant [2 x i8*] [i8* blockaddress(@bar, [[LAB0:%.*]]), i8* blockaddress(@bar, [[END:%.*]])] ;. define internal void @foo(i32 %x) nounwind readnone { -; CGSCC: Function Attrs: nounwind readnone +; CGSCC: Function Attrs: nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -32,7 +32,7 @@ entry: } define internal void @bar(i32* nocapture %pc) nounwind readonly { -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -70,13 +70,13 @@ indirectgoto: ; preds = %lab0, %entry } define i32 @main() nounwind readnone { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -86,9 +86,9 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nounwind memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll b/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll index 96fb44bd6a982..ef8005392407d 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll @@ -12,7 +12,7 @@ define internal void @foo(i32 %X) { } define void @bar() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -20,5 +20,5 @@ define void @bar() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll b/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll index 64cbc41ff7c53..c24c9b2f3bc80 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll @@ -5,7 +5,7 @@ target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" define void @test(i32 signext %n) { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (i32 signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -79,5 +79,5 @@ _ZN5boost4math4signIgEEiRKT_.exit30: ; preds = %cond.false.i28, %if } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll index 3c567d10d6361..0dceadd6968d0 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll @@ -8,7 +8,7 @@ ; CHECK: @[[_ZL6TEST1G:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 42, align 4 ;. define void @_Z7test1f1v() nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@_Z7test1f1v ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -32,7 +32,7 @@ if.end: ; preds = %if.then, %entry } define i32 @_Z7test1f2v() nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@_Z7test1f2v ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -43,5 +43,5 @@ entry: ret i32 %tmp } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index 835d8df51a94b..b86c92a306cc5 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -38,7 +38,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal i32 @cb0(i32 %zero) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb0 ; CHECK-SAME: (i32 [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -49,7 +49,7 @@ entry: } define internal i32 @cb1(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb1 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -60,13 +60,13 @@ entry: } define internal i32 @cb2(i32 %unknown) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cb2 ; TUNIT-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 [[UNKNOWN]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cb2 ; CGSCC-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -78,7 +78,7 @@ entry: } define internal i32 @cb3(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb3 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -89,7 +89,7 @@ entry: } define internal i32 @cb4(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb4 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -125,10 +125,10 @@ declare !callback !3 void @broker(i32 (i32)*, i32 (i32)*, i32 (i32)*, i32, i32) !2 = !{i64 2, i64 3, i1 false} !3 = !{!0, !2, !1} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1, !2, !3} ; CHECK: [[META1:![0-9]+]] = !{i64 0, i64 3, i1 false} diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll index 6bdd396c2a6ac..539faa222c5dd 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll @@ -78,7 +78,7 @@ define internal i8* @side_effects(i8 %v) { } define internal i8* @no_side_effects(i8 %v) readonly nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@no_side_effects ; CGSCC-SAME: (i8 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: ret i8* null @@ -96,5 +96,5 @@ define internal i8* @dont_zap_me(i8 %v) { ret i8* null } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 05f86d3d1b709..451ccd13d5b65 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -68,7 +68,7 @@ entry: declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) define internal i8* @foo(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i8* noalias nocapture nofree readnone align 4294967296 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i8* @bar(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -90,7 +90,7 @@ entry: } define internal i8* @baz(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@baz ; CHECK-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -101,7 +101,7 @@ entry: } define internal i8* @buz(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@buz ; CHECK-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -114,7 +114,7 @@ entry: !1 = !{i64 2, i64 3, i1 false} !0 = !{!1} ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1} ; CHECK: [[META1:![0-9]+]] = !{i64 2, i64 3, i1 false} diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll index d268731f8ddb2..ae2060ac876e0 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll @@ -5,7 +5,7 @@ ; CHECK-NOT: %X define internal i32 @foo(i32 %X) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: ret i32 undef @@ -16,12 +16,12 @@ define internal i32 @foo(i32 %X) { } define void @bar() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@bar ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret void @@ -31,7 +31,7 @@ define void @bar() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll b/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll index 07c1c4f0296d1..ed8ebd2f02901 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll @@ -9,13 +9,13 @@ ; FIXME: Remove obsolete calls/instructions define i32 @main() noreturn nounwind { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 123 ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -28,7 +28,7 @@ entry: } define internal i32 @wwrite(i64 %i) nounwind readnone { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@wwrite ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,9 +54,9 @@ return: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll index 96772b1681ba9..af767ba061051 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -4,7 +4,7 @@ ;; This function returns its second argument on all return statements define internal i32* @incdec(i1 %C, i32* %V) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@incdec ; TUNIT-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -13,7 +13,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; TUNIT: F: ; TUNIT-NEXT: ret i32* [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@incdec ; CGSCC-SAME: (i1 [[C:%.*]], i32* nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 @@ -44,7 +44,7 @@ F: ; preds = %0 ;; This function returns its first argument as a part of a multiple return ;; value define internal { i32, i32 } @foo(i32 %A, i32 %B) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = add i32 [[A]], [[B]] @@ -59,7 +59,7 @@ define internal { i32, i32 } @foo(i32 %A, i32 %B) { } define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: [[Q:%.*]] = alloca i32, align 4 @@ -79,7 +79,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; CGSCC-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[Q]]) #[[ATTR3:[0-9]+]] ; CGSCC-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR4:[0-9]+]] ; CGSCC-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; CGSCC-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] ; CGSCC-NEXT: br label [[OK:%.*]] ; CGSCC: OK: ; CGSCC-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 @@ -118,14 +118,13 @@ RET: declare i32 @__gxx_personality_v0(...) ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR5]] = { nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR4]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll index ea4bcd791ac97..604af5031c656 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll @@ -5,7 +5,7 @@ ; FIXME: icmp folding is missing define i1 @invokecaller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@invokecaller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR1:[0-9]+]] @@ -15,7 +15,7 @@ define i1 @invokecaller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; TUNIT: FAIL: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@invokecaller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; CGSCC-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR2:[0-9]+]] @@ -37,7 +37,7 @@ FAIL: } define internal i32 @foo(i1 %C) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -46,7 +46,7 @@ define internal i32 @foo(i1 %C) { ; TUNIT: F: ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -65,12 +65,12 @@ F: ; preds = %0 } define i1 @caller(i1 %C) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR3:[0-9]+]] @@ -84,11 +84,11 @@ define i1 @caller(i1 %C) { declare i32 @__gxx_personality_v0(...) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll index 5bde8ff8bc66e..4327e6d0e9f42 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll @@ -7,7 +7,7 @@ %0 = type { i32, i32 } define internal %0 @foo(i1 %Q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i1 [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br i1 [[Q]], label [[T:%.*]], label [[F:%.*]] @@ -34,7 +34,7 @@ F: ; preds = %0 } define internal %0 @bar(i1 %Q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[A:%.*]] = insertvalue [[TMP0:%.*]] undef, i32 21, 0 @@ -59,13 +59,13 @@ F: ; preds = %0 } define %0 @caller(i1 %Q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret [[TMP0]] [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i1 [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR2:[0-9]+]] @@ -84,7 +84,7 @@ define %0 @caller(i1 %Q) { ; Similar to @caller but the result of both calls are actually used. define i32 @caller2(i1 %Q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR1]] @@ -98,7 +98,7 @@ define i32 @caller2(i1 %Q) { ; TUNIT-NEXT: [[R:%.*]] = add i32 [[N]], [[M]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: (i1 [[Q:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR2]] @@ -125,10 +125,10 @@ define i32 @caller2(i1 %Q) { ret i32 %R } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll b/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll index 343c4c8f7a737..6921904488dd7 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal i32 @testf(i1 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@testf ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -29,7 +29,7 @@ if.end: ; preds = %if.then1, %entry } define internal i32 @test1(i1 %c) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -59,12 +59,12 @@ ret2: ; preds = %if.then, %entry } define i32 @main(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret i32 99 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RES:%.*]] = call noundef i32 @test1(i1 [[C]]) #[[ATTR2]] @@ -74,9 +74,9 @@ define i32 @main(i1 %c) { ret i32 %res } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll index 6c10fe2d77228..f7d145d49ae5e 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[GSH:[a-zA-Z0-9_$"\\.-]+]] = dso_local global i32 0, align 4 ;. define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -63,7 +63,7 @@ declare !callback !0 dso_local void @broker(i32*, i32 (i32*, i32*)*, i32*) !1 = !{i64 1, i64 0, i64 2, i1 false} !0 = !{!1} ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readonly willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(read) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1} ; CHECK: [[META1:![0-9]+]] = !{i64 1, i64 0, i64 2, i1 false} diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index 3c86c78e4f3d4..5ffa951ab4766 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -17,7 +17,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i8 0, align 32 ;. define i32* @test1(i32* align 8 %0) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nofree readnone returned align 8 "no-capture-maybe-returned" [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -27,7 +27,7 @@ define i32* @test1(i32* align 8 %0) #0 { ; TEST 2 define i32* @test2(i32* %0) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -37,7 +37,7 @@ define i32* @test2(i32* %0) #0 { ; TEST 3 define i32* @test3(i32* align 8 %0, i32* align 4 %1, i1 %2) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (i32* nofree readnone align 8 "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree readnone align 4 "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = select i1 [[TMP2]], i32* [[TMP0]], i32* [[TMP1]] @@ -49,7 +49,7 @@ define i32* @test3(i32* align 8 %0, i32* align 4 %1, i1 %2) #0 { ; TEST 4 define i32* @test4(i32* align 32 %0, i32* align 32 %1, i1 %2) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test4 ; CHECK-SAME: (i32* nofree readnone align 32 "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree readnone align 32 "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = select i1 [[TMP2]], i32* [[TMP0]], i32* [[TMP1]] @@ -85,12 +85,12 @@ define i32* @test5_2() { ; TEST 6 ; SCC define i32* @test6_1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test6_1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test6_1 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* undef @@ -100,12 +100,12 @@ define i32* @test6_1() #0 { } define i32* @test6_2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test6_2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test6_2 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* undef @@ -134,7 +134,7 @@ define i32* @test6_2() #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@f1 ; CHECK-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: br label [[TMP3:%.*]] @@ -192,7 +192,7 @@ define internal i8* @f2(i8* readnone %0) local_unnamed_addr #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: () local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -216,13 +216,13 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; TEST 7 ; Better than IR information define align 4 i8* @test7() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test7 ; TUNIT-SAME: () #[[ATTR0]] { -; TUNIT-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i8* [[C]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test7 ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = tail call noundef nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) #[[ATTR13:[0-9]+]] @@ -235,7 +235,7 @@ define align 4 i8* @test7() #0 { ; TEST 7b ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f1b ; CGSCC-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP3:%.*]] @@ -296,7 +296,7 @@ define internal i8* @f2b(i8* readnone %0) local_unnamed_addr #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f3b ; CGSCC-SAME: () local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -318,12 +318,12 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { } define align 4 i32* @test7b(i32* align 32 %p) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test7b ; TUNIT-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32* [[P]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test7b ; CGSCC-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: ret i32* [[P]] @@ -525,14 +525,14 @@ e: define i64 @test11(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test11 ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[P_CAST]], align 8 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test11 ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -549,7 +549,7 @@ define i64 @test11(i32* %p) { ; FXIME: %p should have nonnull define i64 @test12-1(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test12-1 ; TUNIT-SAME: (i32* nocapture nofree readonly align 16 [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -558,7 +558,7 @@ define i64 @test12-1(i32* align 4 %p) { ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[ARRAYIDX1]], align 16 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test12-1 ; CGSCC-SAME: (i32* nocapture nofree readonly align 16 [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -575,7 +575,7 @@ define i64 @test12-1(i32* align 4 %p) { } define i64 @test12-2(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test12-2 ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -583,7 +583,7 @@ define i64 @test12-2(i32* align 4 %p) { ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[ARRAYIDX0]], align 16 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test12-2 ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -599,7 +599,7 @@ define i64 @test12-2(i32* align 4 %p) { ; FXIME: %p should have nonnull define void @test12-3(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test12-3 ; TUNIT-SAME: (i32* nocapture nofree writeonly align 16 [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -608,7 +608,7 @@ define void @test12-3(i32* align 4 %p) { ; TUNIT-NEXT: store i64 0, i64* [[ARRAYIDX1]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test12-3 ; CGSCC-SAME: (i32* nocapture nofree writeonly align 16 [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -625,7 +625,7 @@ define void @test12-3(i32* align 4 %p) { } define void @test12-4(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test12-4 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -633,7 +633,7 @@ define void @test12-4(i32* align 4 %p) { ; TUNIT-NEXT: store i64 0, i64* [[ARRAYIDX0]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test12-4 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -699,7 +699,7 @@ define void @test12-6(i32* align 4 %p) { } define void @test13(i1 %c, i32* align 32 %dst) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test13 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -712,7 +712,7 @@ define void @test13(i1 %c, i32* align 32 %dst) #0 { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test13 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -737,7 +737,7 @@ end: } define void @test13-1(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-1 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -750,7 +750,7 @@ define void @test13-1(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-1 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -775,7 +775,7 @@ end: } define void @test13-2(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-2 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -788,7 +788,7 @@ define void @test13-2(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-2 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -813,7 +813,7 @@ end: } define void @test13-3(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-3 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -826,7 +826,7 @@ define void @test13-3(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-3 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -852,13 +852,13 @@ end: ; Don't crash on ptr2int/int2ptr uses. define i64 @ptr2int(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ptr2int -; TUNIT-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9]] { +; TUNIT-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint i32* [[P]] to i64 ; TUNIT-NEXT: ret i64 [[P2I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ptr2int ; CGSCC-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[P2I:%.*]] = ptrtoint i32* [[P]] to i64 @@ -868,13 +868,13 @@ define i64 @ptr2int(i32* %p) { ret i64 %p2i } define i64* @int2ptr(i64 %i) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@int2ptr ; TUNIT-SAME: (i64 [[I:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i64 [[I]] to i64* ; TUNIT-NEXT: ret i64* [[I2P]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@int2ptr ; CGSCC-SAME: (i64 [[I:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i64 [[I]] to i64* @@ -886,13 +886,13 @@ define i64* @int2ptr(i64 %i) { ; Use the store alignment only for the pointer operand. define void @aligned_store(i8* %Value, i8** %Ptr) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@aligned_store ; TUNIT-SAME: (i8* nofree writeonly [[VALUE:%.*]], i8** nocapture nofree noundef nonnull writeonly align 32 dereferenceable(8) [[PTR:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: store i8* [[VALUE]], i8** [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@aligned_store ; CGSCC-SAME: (i8* nofree writeonly [[VALUE:%.*]], i8** nocapture nofree noundef nonnull writeonly align 32 dereferenceable(8) [[PTR:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: store i8* [[VALUE]], i8** [[PTR]], align 32 @@ -916,14 +916,14 @@ define void @align_call_op_not_store(i8* align 2048 %arg) { } define void @align_store_after_bc(i32* align 2048 %arg) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@align_store_after_bc ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly align 2048 dereferenceable(1) [[ARG:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: [[BC:%.*]] = bitcast i32* [[ARG]] to i8* ; TUNIT-NEXT: store i8 0, i8* [[BC]], align 2048 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@align_store_after_bc ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly align 2048 dereferenceable(1) [[ARG:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: [[BC:%.*]] = bitcast i32* [[ARG]] to i8* @@ -939,13 +939,13 @@ define void @align_store_after_bc(i32* align 2048 %arg) { ; we cannot also put on the caller. @cnd = external global i1 define i32 @musttail_callee_1(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@musttail_callee_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 32 ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@musttail_callee_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 32 @@ -955,24 +955,24 @@ define i32 @musttail_callee_1(i32* %p) { ret i32 %v } define i32 @musttail_caller_1(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@musttail_caller_1 ; TUNIT-SAME: (i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; TUNIT-NEXT: [[C:%.*]] = load i1, i1* @cnd, align 1 ; TUNIT-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]] ; TUNIT: mt: -; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree readonly [[P]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree readonly [[P]]) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret i32 [[V]] ; TUNIT: exit: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@musttail_caller_1 ; CGSCC-SAME: (i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = load i1, i1* @cnd, align 1 ; CGSCC-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]] ; CGSCC: mt: -; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret i32 [[V]] ; CGSCC: exit: ; CGSCC-NEXT: ret i32 0 @@ -1051,7 +1051,7 @@ declare void @align4_callee(i8* align(4) %p) @G = global i8 0, align 32 define internal i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) norecurse { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@aligned_8_return ; TUNIT-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i8*, align 8 @@ -1068,7 +1068,7 @@ define internal i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) norecurse { ; TUNIT-NEXT: [[L:%.*]] = load i8*, i8** [[STACK]], align 8 ; TUNIT-NEXT: ret i8* [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@aligned_8_return ; CGSCC-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i8*, align 8 @@ -1101,13 +1101,13 @@ end: } define i8* @aligned_8_return_caller(i8* align(16) %a, i1 %c1, i1 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@aligned_8_return_caller ; TUNIT-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { -; TUNIT-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12]] ; TUNIT-NEXT: ret i8* [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@aligned_8_return_caller ; CGSCC-SAME: (i8* nofree readnone align 16 [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR12:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR13]] @@ -1121,33 +1121,32 @@ attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } attributes #2 = { null_pointer_is_valid } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ; TUNIT: attributes #[[ATTR6]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR7]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR1]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR3]] = { nounwind } ; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR13]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR14]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR8]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR13]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/allow_list.ll b/llvm/test/Transforms/Attributor/allow_list.ll index 65650d174e3c1..31cb6bbc66281 100644 --- a/llvm/test/Transforms/Attributor/allow_list.ll +++ b/llvm/test/Transforms/Attributor/allow_list.ll @@ -35,7 +35,7 @@ define internal i32 @range_test(i32 %a) #0 { ; CHECK_DISABLED_FUNCTION-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK_DISABLED_FUNCTION-NEXT: ret i32 [[TMP2]] ; -; CHECK_ENABLED_FUNCTION: Function Attrs: noinline nounwind readnone uwtable +; CHECK_ENABLED_FUNCTION: Function Attrs: noinline nounwind memory(none) uwtable ; CHECK_ENABLED_FUNCTION-LABEL: define {{[^@]+}}@range_test ; CHECK_ENABLED_FUNCTION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK_ENABLED_FUNCTION-NEXT: ret i32 1 @@ -65,7 +65,7 @@ define i32 @range_use1() #0 { ; CHECK_DISABLED_FUNCTION-NEXT: [[TMP1:%.*]] = call i32 @range_test(i32 123) ; CHECK_DISABLED_FUNCTION-NEXT: ret i32 [[TMP1]] ; -; CHECK_ENABLED_FUNCTION: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK_ENABLED_FUNCTION: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK_ENABLED_FUNCTION-LABEL: define {{[^@]+}}@range_use1 ; CHECK_ENABLED_FUNCTION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK_ENABLED_FUNCTION-NEXT: ret i32 1 @@ -112,7 +112,7 @@ attributes #0 = { nounwind uwtable noinline } ;. ; CHECK_DISABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind uwtable } ;. -; CHECK_ENABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind readnone uwtable } -; CHECK_ENABLED_FUNCTION: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } +; CHECK_ENABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind memory(none) uwtable } +; CHECK_ENABLED_FUNCTION: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } ; CHECK_ENABLED_FUNCTION: attributes #[[ATTR2]] = { noinline nounwind uwtable } ;. diff --git a/llvm/test/Transforms/Attributor/alwaysinline.ll b/llvm/test/Transforms/Attributor/alwaysinline.ll index 72847475b68d2..e1602a38a9f6f 100644 --- a/llvm/test/Transforms/Attributor/alwaysinline.ll +++ b/llvm/test/Transforms/Attributor/alwaysinline.ll @@ -8,7 +8,7 @@ ; the function is not exactly defined, and marked alwaysinline and can be inlined, ; so the function can be analyzed define linkonce void @inner1() alwaysinline { -; CHECK: Function Attrs: alwaysinline nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: alwaysinline nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@inner1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -19,13 +19,13 @@ entry: } define void @outer1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@outer1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@outer1 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -122,12 +122,12 @@ define i32 @outer3(i32 %x) { ret i32 %call } ;. -; TUNIT: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { norecurse } ; TUNIT: attributes #[[ATTR3]] = { alwaysinline } ;. -; CGSCC: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { alwaysinline } ;. diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll index e75489a03ceac..2119961d37a12 100644 --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -4,7 +4,7 @@ ; define internal i8 @read_arg(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@read_arg ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -17,14 +17,14 @@ entry: } define internal i8 @read_arg_index(i8* %p, i64 %index) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@read_arg_index ; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 2 ; TUNIT-NEXT: ret i8 [[L]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@read_arg_index ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -38,7 +38,7 @@ entry: } define i8 @call_simplifiable_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -46,7 +46,7 @@ define i8 @call_simplifiable_1() { ; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; TUNIT-NEXT: ret i8 2 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -65,7 +65,7 @@ entry: } define i8 @call_not_simplifiable_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -75,7 +75,7 @@ define i8 @call_not_simplifiable_1() { ; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -94,11 +94,11 @@ entry: } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll index 16c57b1a1e8c6..d74fef2a0b373 100644 --- a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll +++ b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll @@ -192,10 +192,10 @@ attributes #0 = { noinline nounwind sspstrong uwtable} ; TUNIT_: !0 = !{i32 0, i32 101} ; TUNIT_: !1 = !{i32 100, i32 201} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll index 3f26a19cb0825..54c61aace5260 100644 --- a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll +++ b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll @@ -195,10 +195,10 @@ attributes #0 = { noinline nounwind sspstrong uwtable} ; TUNIT_: !0 = !{i32 0, i32 101} ; TUNIT_: !1 = !{i32 100, i32 201} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_range_disabled.ll b/llvm/test/Transforms/Attributor/cb_range_disabled.ll index e3771fd70104f..9463b3d840138 100644 --- a/llvm/test/Transforms/Attributor/cb_range_disabled.ll +++ b/llvm/test/Transforms/Attributor/cb_range_disabled.ll @@ -141,10 +141,10 @@ define i32 @test2_ncheck(i32 %unknown) { ret i32 %3 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_range_enabled.ll b/llvm/test/Transforms/Attributor/cb_range_enabled.ll index 674e45e0f1aaa..42663300eacf6 100644 --- a/llvm/test/Transforms/Attributor/cb_range_enabled.ll +++ b/llvm/test/Transforms/Attributor/cb_range_enabled.ll @@ -145,10 +145,10 @@ define i32 @test2_ncheck(i32 %unknown) { ret i32 %3 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 48d11a76e313e..81b13489703c2 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -14,7 +14,7 @@ ; } ; define i32* @checkAndAdvance(i32* align 16 %0) { -; CHECK: Function Attrs: argmemonly nofree nosync nounwind readonly +; CHECK: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@checkAndAdvance ; CHECK-SAME: (i32* nofree noundef nonnull readonly align 16 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 16 @@ -380,6 +380,6 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; DOT-DAG: Node[[Node44]] -> Node[[Node43]]; ; DOT-DAG: Node[[Node43]] -> Node[[Node44]]; ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readonly } +; CHECK: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index fee64076d90f2..ec3a106d5fa19 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -12,7 +12,7 @@ declare void @deref_phi_user(i32* %a); ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i64 0 ;. define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0:%.*]], double* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1:%.*]], i1 zeroext [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP1]] to i32* @@ -26,7 +26,7 @@ define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 ; TEST 2 define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP0:%.*]], double* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1:%.*]], i1 zeroext [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP1]] to i32* @@ -41,7 +41,7 @@ define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8 ; TEST 3 ; GEP inbounds define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_1 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 1 @@ -52,7 +52,7 @@ define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { } define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_2 ; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(32) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 @@ -63,7 +63,7 @@ define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { } define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_3 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree nonnull readnone dereferenceable(16) "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 1 @@ -81,7 +81,7 @@ define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 ; Better than known in IR. define dereferenceable(4) i32* @test4(i32* dereferenceable(8) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test4 ; CHECK-SAME: (i32* nofree nonnull readnone returned dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -284,7 +284,7 @@ define i32* @f7_3() { ; FIXME: This should have a return dereferenceable(8) but we need to make sure it will work in loops as well. define i32* @test_for_minus_index(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_for_minus_index ; CHECK-SAME: (i32* nofree nonnull writeonly align 4 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 -2 @@ -297,7 +297,7 @@ define i32* @test_for_minus_index(i32* %p) { } define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@deref_or_null_and_nonnull ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(100) [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store i32 1, i32* [[TMP0]], align 4 @@ -316,7 +316,7 @@ define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { ; FIXME: %ptr should be dereferenceable(31) define void @test8(i8* %ptr) #0 { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8 ; CHECK-SAME: (i8* nocapture nofree nonnull writeonly dereferenceable(21) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: br label [[TMP1:%.*]] @@ -351,7 +351,7 @@ define void @test8(i8* %ptr) #0 { ; 8.2 (negative case) define void @test8_neg(i32 %i, i8* %ptr) #0 { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8_neg ; CHECK-SAME: (i32 [[I:%.*]], i8* nocapture nofree nonnull writeonly [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I]] to i64 @@ -374,7 +374,7 @@ define void @test8_neg(i32 %i, i8* %ptr) #0 { ; NOTE: %p should not be dereferenceable define internal void @fill_range_not_inbounds(i32* %p, i64 %start){ -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@fill_range_not_inbounds ; CHECK-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64 [[START:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -410,7 +410,7 @@ for.body: ; preds = %entry, %for.body ; FIXME: %p should be dereferenceable(40) define internal void @fill_range_inbounds(i32* %p, i64 %start){ -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@fill_range_inbounds ; CHECK-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64 [[START:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -445,7 +445,7 @@ for.body: ; preds = %entry, %for.body } define void @call_fill_range(i32* nocapture %p, i64* nocapture readonly %range) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@call_fill_range ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[RANGE:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -454,13 +454,13 @@ define void @call_fill_range(i32* nocapture %p, i64* nocapture readonly %range) ; TUNIT-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@call_fill_range ; CGSCC-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[RANGE:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TMP0:%.*]] = load i64, i64* [[RANGE]], align 8, !range [[RNG0:![0-9]+]] -; CGSCC-NEXT: tail call void @fill_range_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6:[0-9]+]] -; CGSCC-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fill_range_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR1]] +; CGSCC-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR1]] ; CGSCC-NEXT: ret void ; entry: @@ -563,7 +563,7 @@ cont2: ; ; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-1(i32 %a, i32 %b, i32 %c, i32* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@rec-branch-1 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -637,32 +637,59 @@ if.end8: ; preds = %if.then5, %if.else6 ; } ; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-2(i32 %a, i32 %b, i32 %c, i32* %ptr) { -; CHECK: Function Attrs: argmemonly nofree nosync nounwind writeonly -; CHECK-LABEL: define {{[^@]+}}@rec-branch-2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 -; CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] -; CHECK: if.then2: -; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8:%.*]] -; CHECK: if.else: -; CHECK-NEXT: store i32 2, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.else3: -; CHECK-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] -; CHECK: if.then5: -; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.else6: -; CHECK-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.end8: -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) +; TUNIT-LABEL: define {{[^@]+}}@rec-branch-2 +; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] +; TUNIT: if.then: +; TUNIT-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] +; TUNIT: if.then2: +; TUNIT-NEXT: store i32 1, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8:%.*]] +; TUNIT: if.else: +; TUNIT-NEXT: store i32 2, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.else3: +; TUNIT-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] +; TUNIT: if.then5: +; TUNIT-NEXT: store i32 3, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.else6: +; TUNIT-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR7:[0-9]+]] +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.end8: +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) +; CGSCC-LABEL: define {{[^@]+}}@rec-branch-2 +; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] +; CGSCC: if.then: +; CGSCC-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] +; CGSCC: if.then2: +; CGSCC-NEXT: store i32 1, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8:%.*]] +; CGSCC: if.else: +; CGSCC-NEXT: store i32 2, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.else3: +; CGSCC-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] +; CGSCC: if.then5: +; CGSCC-NEXT: store i32 3, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.else6: +; CGSCC-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.end8: +; CGSCC-NEXT: ret void ; entry: %tobool = icmp eq i32 %a, 0 @@ -704,11 +731,17 @@ define void @nonnull_assume_pos(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_pos -; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR8:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; TUNIT-LABEL: define {{[^@]+}}@nonnull_assume_pos +; TUNIT-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR8:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@nonnull_assume_pos +; CGSCC-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR7:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(i8* %arg3), "dereferenceable"(i8* %arg1, i64 1), "dereferenceable"(i8* %arg1, i64 2), "dereferenceable"(i8* %arg1, i64 101), "dereferenceable_or_null"(i8* %arg2, i64 31), "dereferenceable_or_null"(i8* %arg4, i64 42)] call void @unknown() @@ -750,23 +783,41 @@ define void @nonnull_assume_call(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_call -; CHECK-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR9:[0-9]+]] -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; TUNIT-LABEL: define {{[^@]+}}@nonnull_assume_call +; TUNIT-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@nonnull_assume_call +; CGSCC-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR8]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: ret void ; call void @unknown() %p = call i32* @unkown_ptr() @@ -818,27 +869,26 @@ f: !0 = !{i64 10, i64 100} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind memory(argmem: write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR8]] = { willreturn } ; TUNIT: attributes #[[ATTR9]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR5:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR8]] = { willreturn } -; CGSCC: attributes #[[ATTR9]] = { nounwind } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind memory(argmem: write) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR7]] = { willreturn } +; CGSCC: attributes #[[ATTR8]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i64 10, i64 100} ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll index 2f0b8986ccac5..68066817c54bf 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll @@ -6,7 +6,7 @@ ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780 ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1 @@ -44,7 +44,7 @@ define <4 x double> @PR21780(double* %ptr) { define double @PR21780_only_access3_with_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds ; CHECK-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3 @@ -58,7 +58,7 @@ define double @PR21780_only_access3_with_inbounds(double* %ptr) { } define double @PR21780_only_access3_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds ; CHECK-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -71,7 +71,7 @@ define double @PR21780_only_access3_without_inbounds(double* %ptr) { } define double @PR21780_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_without_inbounds ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -94,7 +94,7 @@ define double @PR21780_without_inbounds(double* %ptr) { ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@gep0 ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2 @@ -116,7 +116,7 @@ define void @gep0(i8* %unused, i8* %other, i8* %ptr) { ; Multiple arguments may be dereferenceable. define void @ordering(i8* %ptr1, i32* %ptr2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ordering ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: ret void @@ -137,7 +137,7 @@ define void @ordering(i8* %ptr1, i32* %ptr2) { ; Not in entry block. define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -160,7 +160,7 @@ exit: ; Not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -187,7 +187,7 @@ exit: ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@partial_in_entry ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -215,7 +215,7 @@ exit: ; The 2nd and 3rd loads may never execute. define void @volatile_is_not_dereferenceable(i16* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable ; CHECK-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2 @@ -233,7 +233,7 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { ; TODO: We should allow inference for atomic (but not volatile) ops. define void @atomic_is_alright(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@atomic_is_alright ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_guaranteed_to_transfer_execution(i16* %ptr) { ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@variable_gep_index ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -285,7 +285,7 @@ define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { define void @multi_index_gep(<4 x i8>* %ptr) { ; FIXME: %ptr should be dereferenceable(4) -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@multi_index_gep ; CHECK-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -298,7 +298,7 @@ define void @multi_index_gep(<4 x i8>* %ptr) { ; Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_byte_multiple ; CHECK-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -311,7 +311,7 @@ define void @not_byte_multiple(i9* %ptr) { ; Missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@no_pointer_deref ; CHECK-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -326,7 +326,7 @@ define void @no_pointer_deref(i16* %ptr) { ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@non_consecutive ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -343,7 +343,7 @@ define void @non_consecutive(i32* %ptr) { ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -362,7 +362,7 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { ; Improve on existing dereferenceable_or_null attribute. define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes_and_not_null ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -381,7 +381,7 @@ define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { ; But don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@better_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -398,7 +398,7 @@ define void @better_bytes(i32* dereferenceable(100) %ptr) { } define void @bitcast(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -412,7 +412,7 @@ define void @bitcast(i32* %arg) { } define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast_different_sizes ; CHECK-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -434,7 +434,7 @@ define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { } define void @negative_offset(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@negative_offset ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -448,7 +448,7 @@ define void @negative_offset(i32* %arg) { } define void @stores(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@stores ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -467,7 +467,7 @@ define void @stores(i32* %arg) { } define void @load_store(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@load_store ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -484,7 +484,7 @@ define void @load_store(i32* %arg) { } define void @different_size1(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double* @@ -499,7 +499,7 @@ define void @different_size1(i32* %arg) { } define void @different_size2(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 0, i32* [[ARG]], align 8 @@ -532,7 +532,7 @@ define void @different_size2(i32* %arg) { ; ; ATTRIBUTOR_CGSCC_NPM-LABEL: define i32 @require_cfg_analysis(i32 %c, i32* {{.*}} dereferenceable(4) %p) define i32 @require_cfg_analysis(i32 %c, i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@require_cfg_analysis ; CHECK-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0 @@ -584,9 +584,9 @@ end: ret i32 1 } ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR3]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll index 76ed8c9f7a6e2..9c8bcfc0aed56 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -6,7 +6,7 @@ ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780 ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1 @@ -44,7 +44,7 @@ define <4 x double> @PR21780(double* %ptr) { define double @PR21780_only_access3_with_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds ; CHECK-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3 @@ -58,7 +58,7 @@ define double @PR21780_only_access3_with_inbounds(double* %ptr) { } define double @PR21780_only_access3_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds ; CHECK-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -71,7 +71,7 @@ define double @PR21780_only_access3_without_inbounds(double* %ptr) { } define double @PR21780_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_without_inbounds ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -94,7 +94,7 @@ define double @PR21780_without_inbounds(double* %ptr) { ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@gep0 ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2 @@ -116,7 +116,7 @@ define void @gep0(i8* %unused, i8* %other, i8* %ptr) { ; Multiple arguments may be dereferenceable. define void @ordering(i8* %ptr1, i32* %ptr2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ordering ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: ret void @@ -137,7 +137,7 @@ define void @ordering(i8* %ptr1, i32* %ptr2) { ; Not in entry block. define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -160,7 +160,7 @@ exit: ; Not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -187,7 +187,7 @@ exit: ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@partial_in_entry ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -215,7 +215,7 @@ exit: ; The 2nd and 3rd loads may never execute. define void @volatile_is_not_dereferenceable(i16* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable ; CHECK-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2 @@ -233,7 +233,7 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { ; TODO: We should allow inference for atomic (but not volatile) ops. define void @atomic_is_alright(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@atomic_is_alright ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_guaranteed_to_transfer_execution(i16* %ptr) { ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@variable_gep_index ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -285,7 +285,7 @@ define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { define void @multi_index_gep(<4 x i8>* %ptr) { ; FIXME: %ptr should be dereferenceable(4) -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@multi_index_gep ; CHECK-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -298,7 +298,7 @@ define void @multi_index_gep(<4 x i8>* %ptr) { ; Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_byte_multiple ; CHECK-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -311,7 +311,7 @@ define void @not_byte_multiple(i9* %ptr) { ; Missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@no_pointer_deref ; CHECK-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -326,7 +326,7 @@ define void @no_pointer_deref(i16* %ptr) { ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@non_consecutive ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -343,7 +343,7 @@ define void @non_consecutive(i32* %ptr) { ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -362,7 +362,7 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { ; Improve on existing dereferenceable_or_null attribute. define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes_and_not_null ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -381,7 +381,7 @@ define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { ; But don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@better_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -398,7 +398,7 @@ define void @better_bytes(i32* dereferenceable(100) %ptr) { } define void @bitcast(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -412,7 +412,7 @@ define void @bitcast(i32* %arg) { } define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast_different_sizes ; CHECK-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -434,7 +434,7 @@ define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { } define void @negative_offset(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@negative_offset ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -448,7 +448,7 @@ define void @negative_offset(i32* %arg) { } define void @stores(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@stores ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -467,7 +467,7 @@ define void @stores(i32* %arg) { } define void @load_store(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@load_store ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -484,7 +484,7 @@ define void @load_store(i32* %arg) { } define void @different_size1(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double* @@ -499,7 +499,7 @@ define void @different_size1(i32* %arg) { } define void @different_size2(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 0, i32* [[ARG]], align 8 @@ -532,7 +532,7 @@ define void @different_size2(i32* %arg) { ; ; ATTRIBUTOR_CGSCC_NPM-LABEL: define i32 @require_cfg_analysis(i32 %c, i32* {{.*}} dereferenceable(4) %p) define i32 @require_cfg_analysis(i32 %c, i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@require_cfg_analysis ; CHECK-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0 @@ -584,9 +584,9 @@ end: ret i32 1 } ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR3]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 270797ee9d574..d1240f1449944 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -656,9 +656,9 @@ define void @test16d(i8 %v, i8** %P) { ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR4]] = { noreturn } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { allockind("free") } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR7:[0-9]+]] = { allockind("alloc,uninitialized,aligned") allocsize(1) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) } -; CHECK: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR10]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 04295912d589a..7f2e16d878bbd 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -662,7 +662,7 @@ not_entry: ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync willreturn } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR3]] = { noreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR5]] = { nounwind } ; CHECK: attributes #[[ATTR6]] = { nosync nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index f55c00ed9258f..b7cc29822560f 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@visible ; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -12,12 +12,12 @@ define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@visible ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]] -; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; CGSCC-NEXT: ret i32 [[ADD]] ; @@ -29,7 +29,7 @@ entry: } define private i32 @noalias_args(i32* %A, i32* %B) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -40,14 +40,14 @@ define private i32 @noalias_args(i32* %A, i32* %B) #0 { ; TUNIT-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]] ; TUNIT-NEXT: ret i32 [[ADD2]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CGSCC-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]] ; CGSCC-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]] ; CGSCC-NEXT: ret i32 [[ADD2]] ; @@ -62,7 +62,7 @@ entry: define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -71,7 +71,7 @@ define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -88,7 +88,7 @@ entry: } define dso_local i32 @visible_local(i32* %A) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@visible_local ; TUNIT-SAME: (i32* nocapture nofree readonly [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -99,13 +99,13 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@visible_local ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 5, i32* [[B]], align 4 -; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; CGSCC-NEXT: ret i32 [[ADD]] @@ -120,7 +120,7 @@ entry: } define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_ro ; CGSCC-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[B_PRIV:%.*]] = alloca i32, align 4 @@ -139,13 +139,13 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { } define i32 @visible_local_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@visible_local_2 ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 10 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@visible_local_2 ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 noundef 5, i32 noundef 5) #[[ATTR6:[0-9]+]] @@ -158,13 +158,13 @@ define i32 @visible_local_2() { } define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; TUNIT-NEXT: ret i32 [[T0]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 @@ -177,7 +177,7 @@ define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { } define i32 @visible_local_3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@visible_local_3 ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -185,7 +185,7 @@ define i32 @visible_local_3() { ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@visible_local_3 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -202,18 +202,18 @@ define i32 @visible_local_3() { attributes #0 = { noinline nounwind uwtable willreturn } attributes #1 = { argmemonly noinline nounwind uwtable willreturn} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readonly } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree noinline nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree noinline nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR5]] = { readonly } -; CGSCC: attributes #[[ATTR6]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR5]] = { memory(read) } +; CGSCC: attributes #[[ATTR6]] = { willreturn } ; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 5574e1ef07cc5..80c2ac7c5eca1 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -135,7 +135,7 @@ define void @unused_arg_caller() { ; CHECK_DISABLED-NEXT: call void @unused_arg(i8 noundef 0) ; CHECK_DISABLED-NEXT: ret void ; -; CHECK_ENABLED: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK_ENABLED: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK_ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller ; CHECK_ENABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK_ENABLED-NEXT: unreachable @@ -164,7 +164,7 @@ define linkonce_odr hidden void @__clang_call_terminate() { ;. ; CHECK_DISABLED: attributes #[[ATTR0]] = { norecurse } ;. -; CHECK_ENABLED: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK_ENABLED: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK_ENABLED: attributes #[[ATTR1]] = { norecurse } -; CHECK_ENABLED: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone } +; CHECK_ENABLED: attributes #[[ATTR2:[0-9]+]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index cad70b04e7943..87b8519c8551f 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -37,7 +37,7 @@ declare i32 @bar() nosync readnone ; CGSCC: @[[P:[a-zA-Z0-9_$"\\.-]+]] = global i8 0 ;. define internal i32 @dead_internal_func(i32 %0) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@dead_internal_func ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -68,13 +68,13 @@ define internal i32 @dead_internal_func(i32 %0) { } define i32 @volatile_load(i32*) norecurse nounwind uwtable { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@volatile_load ; TUNIT-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 ; TUNIT-NEXT: ret i32 [[TMP2]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@volatile_load ; CGSCC-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; CGSCC-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 @@ -85,7 +85,7 @@ define i32 @volatile_load(i32*) norecurse nounwind uwtable { } define internal i32 @internal_load(i32*) norecurse nounwind uwtable { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@internal_load ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: ret i32 undef @@ -498,7 +498,7 @@ cleanup: ; FIXME: Should be able to detect undefined behavior. define void @ub(i32* %0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@ub ; TUNIT-SAME: (i32* nocapture nofree writeonly [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: [[POISON:%.*]] = sub nuw i32 0, 1 @@ -507,7 +507,7 @@ define void @ub(i32* %0) { ; TUNIT-NEXT: store i32 0, i32* [[POISON_YET_AGAIN]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@ub ; CGSCC-SAME: (i32* nocapture nofree writeonly [[TMP0:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[POISON:%.*]] = sub nuw i32 0, 1 @@ -524,7 +524,7 @@ define void @ub(i32* %0) { } define void @inf_loop() #0 { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@inf_loop ; TUNIT-SAME: () #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -532,7 +532,7 @@ define void @inf_loop() #0 { ; TUNIT: while.body: ; TUNIT-NEXT: br label [[WHILE_BODY]] ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@inf_loop ; CGSCC-SAME: () #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -551,7 +551,7 @@ while.body: ; preds = %entry, %while.body ; FIXME: Detect infloops, and mark affected blocks dead. define i32 @test5(i32, i32) #0 { -; CHECK: Function Attrs: nosync readnone +; CHECK: Function Attrs: nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] @@ -588,13 +588,13 @@ cond.end: ; preds = %cond.if, %con } define void @rec() #0 { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@rec ; TUNIT-SAME: () #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@rec ; CGSCC-SAME: () #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2228,7 +2228,7 @@ define i32 @switch_default_caller() { } define internal i32 @switch_default_dead(i64 %i) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@switch_default_dead ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: @@ -2255,12 +2255,12 @@ return: } define i32 @switch_default_dead_caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@switch_default_dead_caller ; TUNIT-SAME: () #[[ATTR11:[0-9]+]] { ; TUNIT-NEXT: ret i32 123 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@switch_default_dead_caller ; CGSCC-SAME: () #[[ATTR11]] { ; CGSCC-NEXT: [[CALL2:%.*]] = tail call noundef i32 @switch_default_dead() #[[ATTR16:[0-9]+]] @@ -2377,7 +2377,7 @@ declare void @use_i32p(i32*) ; Allow blockaddress users define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind readonly { -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@dead_with_blockaddress_users ; CGSCC-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR13:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2422,59 +2422,33 @@ indirectgoto: ; preds = %lab0, %entry @e = global %struct.a* null define i32 @main() { -; TUNIT-LABEL: define {{[^@]+}}@main() { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[F:%.*]] = alloca i32, align 4 -; TUNIT-NEXT: br label [[FOR_COND_0:%.*]] -; TUNIT: for.cond.0: -; TUNIT-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] -; TUNIT-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 -; TUNIT-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] -; TUNIT: for.body.0: -; TUNIT-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 -; TUNIT-NEXT: br label [[FOR_COND_0]] -; TUNIT: for.end.0: -; TUNIT-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) -; TUNIT-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 -; TUNIT-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** -; TUNIT-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 -; TUNIT-NEXT: br label [[FOR_COND_1:%.*]] -; TUNIT: for.cond.1: -; TUNIT-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] -; TUNIT-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 -; TUNIT-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] -; TUNIT: for.body.1: -; TUNIT-NEXT: [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]]) -; TUNIT-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 -; TUNIT-NEXT: br label [[FOR_COND_1]] -; TUNIT: for.end.1: -; TUNIT-NEXT: ret i32 0 -; -; CGSCC-LABEL: define {{[^@]+}}@main() { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[FOR_COND_0:%.*]] -; CGSCC: for.cond.0: -; CGSCC-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] -; CGSCC-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 -; CGSCC-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] -; CGSCC: for.body.0: -; CGSCC-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 -; CGSCC-NEXT: br label [[FOR_COND_0]] -; CGSCC: for.end.0: -; CGSCC-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) -; CGSCC-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 -; CGSCC-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** -; CGSCC-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 -; CGSCC-NEXT: br label [[FOR_COND_1:%.*]] -; CGSCC: for.cond.1: -; CGSCC-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] -; CGSCC-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 -; CGSCC-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] -; CGSCC: for.body.1: -; CGSCC-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 -; CGSCC-NEXT: br label [[FOR_COND_1]] -; CGSCC: for.end.1: -; CGSCC-NEXT: ret i32 0 +; CHECK-LABEL: define {{[^@]+}}@main() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[FOR_COND_0:%.*]] +; CHECK: for.cond.0: +; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] +; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 +; CHECK-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] +; CHECK: for.body.0: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 +; CHECK-NEXT: br label [[FOR_COND_0]] +; CHECK: for.end.0: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) +; CHECK-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** +; CHECK-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 +; CHECK-NEXT: br label [[FOR_COND_1:%.*]] +; CHECK: for.cond.1: +; CHECK-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] +; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 +; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] +; CHECK: for.body.1: +; CHECK-NEXT: [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]]) +; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 +; CHECK-NEXT: br label [[FOR_COND_1]] +; CHECK: for.end.1: +; CHECK-NEXT: ret i32 0 ; entry: %f = alloca i32 @@ -2513,12 +2487,12 @@ for.end.1: declare noalias i8* @malloc(i64) define i32 @h(i32 %i) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@h ; TUNIT-SAME: (i32 [[I:%.*]]) #[[ATTR11]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@h ; CGSCC-SAME: (i32 [[I:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: ret i32 0 @@ -2532,7 +2506,7 @@ define i32 @h(i32 %i) { @p = global i8 0 define void @bad_gep() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@bad_gep ; TUNIT-SAME: () #[[ATTR11]] { ; TUNIT-NEXT: entry: @@ -2550,13 +2524,13 @@ define void @bad_gep() { ; TUNIT-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR14]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bad_gep ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[N:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: [[M:%.*]] = alloca i8, align 1 -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR16]] ; CGSCC-NEXT: br label [[EXIT:%.*]] ; CGSCC: while.body: ; CGSCC-NEXT: unreachable @@ -2565,7 +2539,7 @@ define void @bad_gep() { ; CGSCC: if.end: ; CGSCC-NEXT: unreachable ; CGSCC: exit: -; CGSCC-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR17]] +; CGSCC-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR16]] ; CGSCC-NEXT: ret void ; entry: @@ -2594,7 +2568,7 @@ exit: } define i8 @edge_vs_block_liveness() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@edge_vs_block_liveness ; TUNIT-SAME: () #[[ATTR11]] { ; TUNIT-NEXT: entry: @@ -2605,7 +2579,7 @@ define i8 @edge_vs_block_liveness() { ; TUNIT-NEXT: [[PHI:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ 1, [[B1]] ] ; TUNIT-NEXT: ret i8 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@edge_vs_block_liveness ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: @@ -2632,37 +2606,36 @@ declare void @llvm.lifetime.start.p0i8(i64 %0, i8* %1) declare void @llvm.lifetime.end.p0i8(i64 %0, i8* %1) ;. ; TUNIT: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { readnone } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { noreturn nounwind } ; TUNIT: attributes #[[ATTR4]] = { noreturn } -; TUNIT: attributes #[[ATTR5]] = { nosync readnone } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree norecurse nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind readnone } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR5]] = { nosync memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR12:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR12:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR13]] = { nounwind willreturn } ; TUNIT: attributes #[[ATTR14]] = { willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { readnone } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind } ; CGSCC: attributes #[[ATTR3]] = { noreturn nounwind } ; CGSCC: attributes #[[ATTR4]] = { noreturn } -; CGSCC: attributes #[[ATTR5]] = { nosync readnone } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { argmemonly nofree norecurse nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree norecurse noreturn nosync nounwind readnone } -; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR13]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR14:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR13]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR14:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR15]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR16]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR17]] = { willreturn } +; CGSCC: attributes #[[ATTR16]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/liveness_chains.ll b/llvm/test/Transforms/Attributor/liveness_chains.ll index 0f405fa2f7ef4..b7063a02350b0 100644 --- a/llvm/test/Transforms/Attributor/liveness_chains.ll +++ b/llvm/test/Transforms/Attributor/liveness_chains.ll @@ -7,7 +7,7 @@ declare i32 @source() nounwind readonly define i32 @chain_dead(i32 %arg) { -; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@chain_dead ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 0 @@ -27,10 +27,10 @@ define i32 @chain_dead(i32 %arg) { } define i32 @chain_alive(i32 %arg) { -; CHECK: Function Attrs: nounwind readonly +; CHECK: Function Attrs: nounwind memory(read) ; CHECK-LABEL: define {{[^@]+}}@chain_alive ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[INIT:%.*]] = call i32 @source() #[[ATTR0]] +; CHECK-NEXT: [[INIT:%.*]] = call i32 @source() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[V0:%.*]] = add i32 [[ARG]], [[INIT]] ; CHECK-NEXT: [[V1:%.*]] = add i32 [[INIT]], [[V0]] ; CHECK-NEXT: [[V2:%.*]] = add i32 [[V0]], [[V1]] @@ -57,6 +57,7 @@ define i32 @chain_alive(i32 %arg) { ret i32 %v9 } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readonly } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(read) } +; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/lowerheap.ll b/llvm/test/Transforms/Attributor/lowerheap.ll index d3575911415d8..89790cb678b43 100644 --- a/llvm/test/Transforms/Attributor/lowerheap.ll +++ b/llvm/test/Transforms/Attributor/lowerheap.ll @@ -47,6 +47,6 @@ attributes #0 = { nounwind willreturn } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll index fe2a6e0f64c52..619a18eba47f5 100644 --- a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll +++ b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define i8 @test1(i32 %a, i32 %length) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[LENGTH:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -37,7 +37,7 @@ exit: } define i8 @test2(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -185,8 +185,8 @@ declare void @llvm.assume(i1) nounwind declare void @dummy(i1) nounwind declare void @llvm.experimental.guard(i1, ...) ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll index ea5618acd201d..23dcc47b943d6 100644 --- a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll +++ b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll @@ -5,7 +5,7 @@ ; FIXME: DOT should be replaced with 3 define i32 @test-ashr(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test-ashr ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: chk65: @@ -49,5 +49,5 @@ return: ret i32 %retval } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 104aa858e183a..ae1084f8abcb8 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -11,7 +11,7 @@ declare noalias i8* @malloc(i64) inaccessiblememonly ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external dso_local global i32, align 4 ;. define dso_local i8* @internal_only(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -26,7 +26,7 @@ entry: } define dso_local i8* @internal_only_rec(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -66,7 +66,7 @@ return: ; preds = %if.end, %if.then } define dso_local i8* @internal_only_rec_static_helper(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_helper ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i8* @internal_only_rec_static(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -173,7 +173,7 @@ return: ; preds = %if.end, %if.then } define dso_local i8* @internal_argmem_only_read(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_read ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -190,7 +190,7 @@ entry: } define dso_local i8* @internal_argmem_only_write(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_write ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -205,14 +205,14 @@ entry: } define dso_local i8* @internal_argmem_only_rec(i32* %arg) { -; TUNIT: Function Attrs: inaccessiblemem_or_argmemonly +; TUNIT: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_argmem_only_rec ; TUNIT-SAME: (i32* nocapture nofree [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree align 4 [[ARG]]) ; TUNIT-NEXT: ret i8* [[CALL]] ; -; CGSCC: Function Attrs: inaccessiblemem_or_argmemonly +; CGSCC: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_argmem_only_rec ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -225,7 +225,7 @@ entry: } define internal i8* @internal_argmem_only_rec_1(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -281,7 +281,7 @@ return: ; preds = %if.end3, %if.then2, } define internal i8* @internal_argmem_only_rec_2(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -303,7 +303,7 @@ declare i8* @inaccesible_argmem_only_decl(i8* %arg) inaccessiblemem_or_argmemonl declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind argmemonly willreturn define void @callerA1(i8* %arg) { -; CHECK: Function Attrs: argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerA1 ; CHECK-SAME: (i8* [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* [[ARG]]) @@ -313,7 +313,7 @@ define void @callerA1(i8* %arg) { ret void } define void @callerA2(i8* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerA2 ; CHECK-SAME: (i8* [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* [[ARG]]) @@ -323,7 +323,7 @@ define void @callerA2(i8* %arg) { ret void } define void @callerB1() { -; CHECK: Function Attrs: readnone +; CHECK: Function Attrs: memory(none) ; CHECK-LABEL: define {{[^@]+}}@callerB1 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 @@ -335,7 +335,7 @@ define void @callerB1() { ret void } define void @callerB2() { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerB2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 @@ -388,7 +388,7 @@ define void @callerD2() { } define void @callerE(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@callerE ; CHECK-SAME: (i8* nocapture nofree readnone [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: ret void @@ -399,7 +399,7 @@ define void @callerE(i8* %arg) { define void @write_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@write_global ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -409,7 +409,7 @@ define void @write_global() { ret void } define void @write_global_via_arg(i32* %GPtr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@write_global_via_arg ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[GPTR:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* [[GPTR]], align 4 @@ -419,7 +419,7 @@ define void @write_global_via_arg(i32* %GPtr) { ret void } define internal void @write_global_via_arg_internal(i32* %GPtr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@write_global_via_arg_internal ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -430,13 +430,13 @@ define internal void @write_global_via_arg_internal(i32* %GPtr) { } define void @writeonly_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global() #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: call void @write_global() #[[ATTR11:[0-9]+]] @@ -446,13 +446,13 @@ define void @writeonly_global() { ret void } define void @writeonly_global_via_arg() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global_via_arg ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global_via_arg ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) #[[ATTR11]] @@ -464,13 +464,13 @@ define void @writeonly_global_via_arg() { define void @writeonly_global_via_arg_internal() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global_via_arg_internal ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global_via_arg_internal() #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global_via_arg_internal ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @write_global_via_arg_internal() #[[ATTR11]] @@ -481,7 +481,7 @@ define void @writeonly_global_via_arg_internal() { } define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone ; TUNIT-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -493,7 +493,7 @@ define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone ; CGSCC-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -517,7 +517,7 @@ f: } define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone_internal ; TUNIT-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -529,7 +529,7 @@ define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone_internal ; CGSCC-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -553,14 +553,14 @@ f: } define i8 @readnone_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -573,7 +573,7 @@ define i8 @readnone_caller(i1 %c) { } define internal i8 @recursive_readnone_internal2(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_readnone_internal2 ; TUNIT-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -585,7 +585,7 @@ define internal i8 @recursive_readnone_internal2(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_readnone_internal2 ; CGSCC-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -609,13 +609,13 @@ f: } define i8 @readnone_caller2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[R:%.*]] = call i8 @recursive_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR13]] @@ -626,7 +626,7 @@ define i8 @readnone_caller2(i1 %c) { } define internal i8 @recursive_not_readnone_internal3(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone_internal3 ; TUNIT-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -638,7 +638,7 @@ define internal i8 @recursive_not_readnone_internal3(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone_internal3 ; CGSCC-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -662,14 +662,14 @@ f: } define i8 @readnone_caller3(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal3(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -682,7 +682,7 @@ define i8 @readnone_caller3(i1 %c) { } define internal void @argmemonly_before_ipconstprop(i32* %p) argmemonly { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@argmemonly_before_ipconstprop ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -693,13 +693,13 @@ define internal void @argmemonly_before_ipconstprop(i32* %p) argmemonly { } define void @argmemonky_caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@argmemonky_caller ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @argmemonly_before_ipconstprop() #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@argmemonky_caller ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @argmemonly_before_ipconstprop() #[[ATTR11]] @@ -709,31 +709,31 @@ define void @argmemonky_caller() { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { inaccessiblememonly } -; TUNIT: attributes #[[ATTR1]] = { inaccessiblemem_or_argmemonly } -; TUNIT: attributes #[[ATTR2]] = { readnone } -; TUNIT: attributes #[[ATTR3]] = { argmemonly } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR0]] = { memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { memory(none) } +; TUNIT: attributes #[[ATTR3]] = { memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR8]] = { nofree nosync nounwind memory(argmem: write) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { inaccessiblememonly } -; CGSCC: attributes #[[ATTR1]] = { inaccessiblemem_or_argmemonly } -; CGSCC: attributes #[[ATTR2]] = { readnone } -; CGSCC: attributes #[[ATTR3]] = { argmemonly } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR13]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR0]] = { memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { memory(none) } +; CGSCC: attributes #[[ATTR3]] = { memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR13]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index c6a09901fb946..44e7f41abb58a 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -67,7 +67,7 @@ define void @external(void (i8*)* %fp) { ; CGSCC-SAME: (void (i8*)* [[FP:%.*]]) { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: call void @foo(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR1]] +; CGSCC-NEXT: call void @foo(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) ; CGSCC-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) ; CGSCC-NEXT: call void @callback2(void (i8*)* [[FP]]) @@ -93,7 +93,7 @@ entry: define internal void @foo(i32* %a) { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -108,9 +108,10 @@ entry: declare void @callback1(void (i32*)*) declare void @callback2(void (i8*)*) ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll index ab4fceafe39fb..b28bf102e6bed 100644 --- a/llvm/test/Transforms/Attributor/misc_crash.ll +++ b/llvm/test/Transforms/Attributor/misc_crash.ll @@ -9,7 +9,7 @@ ; CHECK: @[[VAR2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0 ;. define i32 addrspace(1)* @foo(i32 addrspace(4)* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32 addrspace(4)* nofree readnone [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -22,7 +22,7 @@ entry: } define i32* @func1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@func1 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32* getelementptr inbounds ([1 x i32], [1 x i32]* @var1, i32 0, i32 0) @@ -37,7 +37,7 @@ define internal i32* @func1a([1 x i32]* %arg) { } define internal void @func2a(i32* %0) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@func2a ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @var2, align 4 @@ -118,7 +118,7 @@ define i16 @foo3() { ret i16 %call } define internal i16 @bar3(i16* %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar3 ; CHECK-SAME: (i16* nocapture nofree readnone [[P1:%.*]], i16 returned [[P2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i16 [[P2]] @@ -130,7 +130,7 @@ define internal i16 @bar3(i16* %p1, i16 %p2) { ; CHECK-SAME: (i8*) declare void @func6(i8*) ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; CHECK: attributes #[[ATTR2]] = { norecurse } ;. diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 84140995e4c8a..dcb93fe46cc33 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -43,7 +43,7 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocapture ; CHECK-SAME: (i8* nocapture nofree readnone [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -159,7 +159,7 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@getter ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i8** @G @@ -169,12 +169,12 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@calle1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i8** @G ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@calle1 ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = call noundef nonnull align 8 dereferenceable(8) i8** @getter() #[[ATTR11:[0-9]+]] @@ -520,7 +520,7 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@p2i ; CHECK-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 @@ -531,22 +531,22 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@i2p ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { -; TUNIT-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; TUNIT-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) #[[ATTR10:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@i2p ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR11]] ; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; CGSCC-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[BC]]) #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[BC]]) #[[ATTR11]] ; CGSCC-NEXT: ret i32 [[CALL]] ; %c = call i32 @p2i(i32* %arg) @@ -556,13 +556,13 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@ret ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@ret ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 @@ -599,7 +599,7 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; CGSCC-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture nofree noundef nonnull align 8 dereferenceable(240) [[TMP0]]) #[[ATTR13:[0-9]+]] +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture nofree noundef nonnull align 8 dereferenceable(240) [[TMP0]]) #[[ATTR12:[0-9]+]] ; CGSCC-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) ; CGSCC-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) ; CGSCC-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) @@ -652,13 +652,13 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@make_alias ; TUNIT-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@make_alias ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 @@ -669,13 +669,13 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@only_store ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: store i32 0, i32* [[P]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@only_store ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: store i32 0, i32* [[P]], align 4 @@ -686,28 +686,28 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test15_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test15_caller ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13:[0-9]+]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; %tobool = icmp eq i32 %c, 0 @@ -743,32 +743,32 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test16_sub ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; TUNIT: if.then2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END3]] ; TUNIT: if.end3: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test16_sub ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] -; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: ; CGSCC-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 @@ -800,16 +800,16 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test16_caller ; TUNIT-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test16_caller ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; tail call void @test16_sub(i32* %p, i32 %c, i32 %c) @@ -836,32 +836,32 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test17_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; TUNIT: l1: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L3:%.*]] ; TUNIT: l2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L3]] ; TUNIT: l3: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test17_caller ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; CGSCC: l1: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L3:%.*]] ; CGSCC: l2: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L3]] ; CGSCC: l3: ; CGSCC-NEXT: ret void @@ -894,12 +894,12 @@ l3: ; } define void @noreturn() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@noreturn -; TUNIT-SAME: () #[[ATTR9]] { +; TUNIT-SAME: () #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@noreturn ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret void @@ -909,30 +909,30 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test18_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; TUNIT: l1: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L2]] ; TUNIT: l2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test18_caller ; CGSCC-SAME: (i32* noalias nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; CGSCC: l1: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L2]] ; CGSCC: l2: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; entry: @@ -949,33 +949,32 @@ l2: ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { nounwind uwtable } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { nounwind ssp uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR6:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR11]] = { willreturn } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { willreturn memory(readwrite) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR1]] = { nounwind uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR3]] = { nounwind } ; CGSCC: attributes #[[ATTR4]] = { nounwind ssp uwtable } -; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR7:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR12]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR11]] = { willreturn } +; CGSCC: attributes #[[ATTR12]] = { willreturn memory(readwrite) } +; CGSCC: attributes #[[ATTR13]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn memory(write) } ;. diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 49c17d61575fc..2e0f702792fd0 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -11,7 +11,7 @@ ; CHECK: @[[G3:[a-zA-Z0-9_$"\\.-]+]] = global i8* null ;. define i32* @c1(i32* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c1 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32* [[Q]] @@ -21,7 +21,7 @@ define i32* @c1(i32* %q) { ; It would also be acceptable to mark %q as readnone. Update @c3 too. define void @c2(i32* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@c2 ; CHECK-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32* [[Q]], i32** @g, align 8 @@ -32,16 +32,16 @@ define void @c2(i32* %q) { } define void @c3(i32* %q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@c3 ; TUNIT-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR14:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@c3 ; CGSCC-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: ret void ; call void @c2(i32* %q) @@ -49,7 +49,7 @@ define void @c3(i32* %q) { } define i1 @c4(i32* %q, i32 %bitno) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c4 ; CHECK-SAME: (i32* nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -73,7 +73,7 @@ l1: ; c4b is c4 but without the escaping part define i1 @c4b(i32* %q, i32 %bitno) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c4b ; CHECK-SAME: (i32* nocapture nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -98,7 +98,7 @@ l1: @lookup_table = global [2 x i1] [ i1 0, i1 1 ] define i1 @c5(i32* %q, i32 %bitno) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c5 ; TUNIT-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -108,7 +108,7 @@ define i1 @c5(i32* %q, i32 %bitno) { ; TUNIT-NEXT: [[VAL:%.*]] = load i1, i1* [[LOOKUP]], align 1 ; TUNIT-NEXT: ret i1 [[VAL]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c5 ; CGSCC-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -130,10 +130,10 @@ define i1 @c5(i32* %q, i32 %bitno) { declare void @throw_if_bit_set(i8*, i8) readonly define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c6 ; TUNIT-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #[[ATTR4:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { -; TUNIT-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) #[[ATTR3:[0-9]+]] +; TUNIT-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) ; TUNIT-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] ; TUNIT: ret0: ; TUNIT-NEXT: ret i1 false @@ -142,10 +142,10 @@ define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: cleanup ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c6 ; CGSCC-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #[[ATTR5:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { -; CGSCC-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) ; CGSCC-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] ; CGSCC: ret0: ; CGSCC-NEXT: ret i1 false @@ -167,7 +167,7 @@ ret1: declare i32 @__gxx_personality_v0(...) define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@lookup_bit ; CHECK-SAME: (i32* nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -184,17 +184,17 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { } define i1 @c7(i32* %q, i32 %bitno) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c7 ; TUNIT-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR15:[0-9]+]] ; TUNIT-NEXT: [[VAL:%.*]] = load i1, i1* [[PTR]], align 1 ; TUNIT-NEXT: ret i1 [[VAL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c7 ; CGSCC-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR6:[0-9]+]] { -; CGSCC-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR18:[0-9]+]] +; CGSCC-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR17:[0-9]+]] ; CGSCC-NEXT: [[VAL:%.*]] = load i1, i1* [[PTR]], align 1 ; CGSCC-NEXT: ret i1 [[VAL]] ; @@ -292,13 +292,13 @@ define void @nc2(i32* %p, i32* %q) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nc2 ; TUNIT-SAME: (i32* nocapture nofree [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR5]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree [[P]], i1 noundef false) #[[ATTR16:[0-9]+]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree [[P]], i1 noundef false) #[[ATTR14]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nc2 ; CGSCC-SAME: (i32* nocapture nofree align 4 [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR8:[0-9]+]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree align 4 [[P]], i1 noundef false) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree align 4 [[P]], i1 noundef false) #[[ATTR14]] ; CGSCC-NEXT: ret void ; %1 = call i32 @nc1(i32* %q, i32* %p, i1 0) ; [#uses=0] @@ -320,16 +320,16 @@ define void @nc3(void ()* %p) { ; FIXME: readonly and nocapture missing on the pointer. declare void @external(i8* readonly) nounwind argmemonly define void @nc4(i8* %p) { -; TUNIT: Function Attrs: argmemonly nounwind +; TUNIT: Function Attrs: nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@nc4 ; TUNIT-SAME: (i8* [[P:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR17:[0-9]+]] +; TUNIT-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR16:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nounwind +; CGSCC: Function Attrs: nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@nc4 ; CGSCC-SAME: (i8* [[P:%.*]]) #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR19:[0-9]+]] +; CGSCC-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR18:[0-9]+]] ; CGSCC-NEXT: ret void ; call void @external(i8* %p) @@ -349,17 +349,17 @@ define void @nc5(void (i8*)* %f, i8* %p) { ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test1_1 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X1_1:%.*]], i8* nocapture nofree readnone [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR7:[0-9]+]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR17:[0-9]+]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test1_1 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X1_1:%.*]], i8* nocapture nofree readnone [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR19:[0-9]+]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -369,23 +369,23 @@ define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { } define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test1_2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X1_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR7]] +; TUNIT-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: br label [[F]] ; TUNIT: f: ; TUNIT-NEXT: ret i8* [[Y1_2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test1_2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X1_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR10]] +; CGSCC-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: br label [[F]] ; CGSCC: f: @@ -401,17 +401,17 @@ f: } define void @test2(i8* %x2) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X2:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR7]] +; TUNIT-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X2:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR10]] +; CGSCC-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -421,17 +421,17 @@ define void @test2(i8* %x2) { } define void @test3(i8* %x3, i8* %y3, i8* %z3) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test3 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X3:%.*]], i8* nocapture nofree readnone [[Y3:%.*]], i8* nocapture nofree readnone [[Z3:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR7]] +; TUNIT-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test3 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X3:%.*]], i8* nocapture nofree readnone [[Y3:%.*]], i8* nocapture nofree readnone [[Z3:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR10]] +; CGSCC-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -441,17 +441,17 @@ define void @test3(i8* %x3, i8* %y3, i8* %z3) { } define void @test4_1(i8* %x4_1, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test4_1 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test4_1 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -461,23 +461,23 @@ define void @test4_1(i8* %x4_1, i1 %c) { } define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test4_2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR7]] +; TUNIT-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: br label [[F]] ; TUNIT: f: ; TUNIT-NEXT: ret i8* [[Y4_2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test4_2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR10]] +; CGSCC-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: br label [[F]] ; CGSCC: f: @@ -521,13 +521,13 @@ define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) { } define void @test_cmpxchg(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg i32* [[P]], i32 0, i32 1 acquire monotonic, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg i32* [[P]], i32 0, i32 1 acquire monotonic, align 4 @@ -538,13 +538,13 @@ define void @test_cmpxchg(i32* %p) { } define void @test_cmpxchg_ptr(i32** %p, i32* %q) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg_ptr ; TUNIT-SAME: (i32** nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg i32** [[P]], i32* null, i32* [[Q]] acquire monotonic, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg_ptr ; CGSCC-SAME: (i32** nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg i32** [[P]], i32* null, i32* [[Q]] acquire monotonic, align 8 @@ -555,13 +555,13 @@ define void @test_cmpxchg_ptr(i32** %p, i32* %q) { } define void @test_atomicrmw(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_atomicrmw ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[P]], i32 1 seq_cst, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_atomicrmw ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[P]], i32 1 seq_cst, align 4 @@ -572,7 +572,7 @@ define void @test_atomicrmw(i32* %p) { } define void @test_volatile(i32* %x) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_volatile ; TUNIT-SAME: (i32* nofree align 4 [[X:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: @@ -580,7 +580,7 @@ define void @test_volatile(i32* %x) { ; TUNIT-NEXT: store volatile i32 0, i32* [[GEP]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_volatile ; CGSCC-SAME: (i32* nofree align 4 [[X:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: entry: @@ -607,7 +607,7 @@ define void @nocaptureLaunder(i8* %p) { ; CGSCC-LABEL: define {{[^@]+}}@nocaptureLaunder ; CGSCC-SAME: (i8* nocapture nofree [[P:%.*]]) #[[ATTR7]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20:[0-9]+]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8 42, i8* [[B]], align 1 ; CGSCC-NEXT: ret void ; @@ -629,7 +629,7 @@ define void @captureLaunder(i8* %p) { ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@captureLaunder ; CGSCC-SAME: (i8* nofree [[P:%.*]]) #[[ATTR7]] { -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8* [[B]], i8** @g2, align 8 ; CGSCC-NEXT: ret void ; @@ -639,19 +639,19 @@ define void @captureLaunder(i8* %p) { } define void @nocaptureStrip(i8* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@nocaptureStrip ; TUNIT-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR19:[0-9]+]] +; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] ; TUNIT-NEXT: store i8 42, i8* [[B]], align 1 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@nocaptureStrip ; CGSCC-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8 42, i8* [[B]], align 1 ; CGSCC-NEXT: ret void ; @@ -663,17 +663,17 @@ entry: @g3 = global i8* null define void @captureStrip(i8* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@captureStrip ; TUNIT-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR19]] +; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] ; TUNIT-NEXT: store i8* [[B]], i8** @g3, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@captureStrip ; CGSCC-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8* [[B]], i8** @g3, align 8 ; CGSCC-NEXT: ret void ; @@ -683,7 +683,7 @@ define void @captureStrip(i8* %p) { } define i1 @captureICmp(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@captureICmp ; CHECK-SAME: (i32* nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* [[X]], null @@ -694,7 +694,7 @@ define i1 @captureICmp(i32* %x) { } define i1 @captureICmpRev(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@captureICmpRev ; CHECK-SAME: (i32* nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* null, [[X]] @@ -705,7 +705,7 @@ define i1 @captureICmpRev(i32* %x) { } define i1 @nocaptureInboundsGEPICmp(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureInboundsGEPICmp ; CHECK-SAME: (i32* nocapture nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 false @@ -717,7 +717,7 @@ define i1 @nocaptureInboundsGEPICmp(i32* %x) { } define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureInboundsGEPICmpRev ; CHECK-SAME: (i32* nocapture nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 true @@ -729,7 +729,7 @@ define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { } define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureDereferenceableOrNullICmp ; CHECK-SAME: (i32* nocapture nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* @@ -742,14 +742,14 @@ define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x } define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) null_pointer_is_valid { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp ; TUNIT-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* ; TUNIT-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null ; TUNIT-NEXT: ret i1 [[TMP2]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp ; CGSCC-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* @@ -776,16 +776,16 @@ entry: declare i8* @unknownpi8pi8(i8*,i8* returned) define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test_returned1 -; TUNIT-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR4]] { +; TUNIT-SAME: (i8* nocapture [[A:%.*]], i8* returned [[B:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* [[A]], i8* [[B]]) ; TUNIT-NEXT: ret i8* [[P]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test_returned1 -; CGSCC-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR5]] { +; CGSCC-SAME: (i8* nocapture [[A:%.*]], i8* returned [[B:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* [[A]], i8* [[B]]) ; CGSCC-NEXT: ret i8* [[P]] @@ -796,14 +796,14 @@ entry: } define i8* @test_returned2(i8* %A, i8* %B) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test_returned2 ; TUNIT-SAME: (i8* readonly [[A:%.*]], i8* readonly [[B:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* readonly [[A]], i8* readonly [[B]]) #[[ATTR4]] ; TUNIT-NEXT: ret i8* [[P]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test_returned2 ; CGSCC-SAME: (i8* readonly [[A:%.*]], i8* readonly [[B:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: @@ -843,46 +843,44 @@ define void @ptr_uses(i8* %ptr, i8* %wptr) { declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR3]] = { readonly } -; TUNIT: attributes #[[ATTR4]] = { nounwind readonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR4]] = { nounwind memory(read) } ; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nounwind } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree norecurse nounwind willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR6]] = { nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind memory(write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn memory(read) } ; TUNIT: attributes #[[ATTR11]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR12:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn } -; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR15]] = { nofree nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR17]] = { nounwind } +; TUNIT: attributes #[[ATTR12:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR15]] = { nofree nounwind willreturn } +; TUNIT: attributes #[[ATTR16]] = { nounwind } +; TUNIT: attributes #[[ATTR17]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR18]] = { willreturn } -; TUNIT: attributes #[[ATTR19]] = { readnone willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR4]] = { readonly } -; CGSCC: attributes #[[ATTR5]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR5]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nounwind } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR11]] = { argmemonly nofree norecurse nounwind willreturn } -; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR13:[0-9]+]] = { nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind memory(write) } +; CGSCC: attributes #[[ATTR11]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR13:[0-9]+]] = { nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR15:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn } -; CGSCC: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CGSCC: attributes #[[ATTR17]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR18]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR19]] = { nounwind } -; CGSCC: attributes #[[ATTR20]] = { willreturn } +; CGSCC: attributes #[[ATTR15:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CGSCC: attributes #[[ATTR17]] = { willreturn } +; CGSCC: attributes #[[ATTR18]] = { nounwind } +; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll index f54b15ba6c5d4..ea6fc175b97b2 100644 --- a/llvm/test/Transforms/Attributor/nocapture-2.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -16,7 +16,7 @@ declare i32* @unknown() ; ; no-capture is missing on %p because it is not dereferenceable define i32 @is_null_return(i32* %p) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@is_null_return ; CHECK-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -42,7 +42,7 @@ entry: ; ; no-capture is missing on %p because it is not dereferenceable define i32 @is_null_control(i32* %p) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@is_null_control ; CHECK-SAME: (i32* nofree [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -99,7 +99,7 @@ return: ; preds = %if.end3, %if.then2, ; } ; define double* @srec0(double* %a) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec0 ; CHECK-SAME: (double* nocapture nofree readnone [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -124,7 +124,7 @@ entry: ; Other arguments are possible here due to the no-return behavior. ; define i32* @srec16(i32* %a) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec16 ; CHECK-SAME: (i32* nocapture nofree readnone [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -164,27 +164,49 @@ entry: ; return scc_A((int*)(scc_A(a) ? scc_B((double*)a) : scc_C(a))); ; } define float* @scc_A(i32* dereferenceable_or_null(4) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_A -; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* -; CHECK-NEXT: ret float* [[TMP4]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_A +; TUNIT-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* +; TUNIT-NEXT: ret float* [[TMP4]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_A +; CGSCC-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10:[0-9]+]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* +; CGSCC-NEXT: ret float* [[TMP4]] ; entry: %tobool = icmp ne i32* %a, null @@ -211,27 +233,49 @@ cond.end: ; preds = %cond.false, %cond.t ; FIXME: the call1 below to scc_B should return dereferenceable_or_null(8) (as the callee does). Something prevented that deduction and needs to be investigated. define i64* @scc_B(double* dereferenceable_or_null(8) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_B -; CHECK-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* -; CHECK-NEXT: ret i64* [[TMP4]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_B +; TUNIT-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* +; TUNIT-NEXT: ret i64* [[TMP4]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_B +; CGSCC-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* +; CGSCC-NEXT: ret i64* [[TMP4]] ; entry: %tobool = icmp ne double* %a, null @@ -257,29 +301,53 @@ cond.end: ; preds = %cond.false, %cond.t } define i8* @scc_C(i16* dereferenceable_or_null(2) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_C -; CHECK-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR2]] -; CHECK-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR2]] -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* -; CHECK-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* -; CHECK-NEXT: ret i8* [[TMP3]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_C +; TUNIT-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR9]] +; TUNIT-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* +; TUNIT-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* +; TUNIT-NEXT: ret i8* [[TMP3]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_C +; CGSCC-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR10]] +; CGSCC-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR10]] +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* +; CGSCC-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* +; CGSCC-NEXT: ret i8* [[TMP3]] ; entry: %bc = bitcast i16* %a to i32* @@ -360,7 +428,7 @@ declare i32 @printf(i8* nocapture, ...) ; ; There should *not* be a no-capture attribute on %a define i64* @not_captured_but_returned_0(i64* %a) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: define {{[^@]+}}@not_captured_but_returned_0 ; CHECK-SAME: (i64* nofree noundef nonnull returned writeonly align 8 dereferenceable(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: @@ -381,7 +449,7 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @not_captured_but_returned_1(i64* %a) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: define {{[^@]+}}@not_captured_but_returned_1 ; CHECK-SAME: (i64* nofree nonnull writeonly align 8 dereferenceable(16) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -403,20 +471,20 @@ entry: ; } ; define void @test_not_captured_but_returned_calls(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test_not_captured_but_returned_calls ; TUNIT-SAME: (i64* nocapture nofree writeonly align 8 [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9:[0-9]+]] -; TUNIT-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10:[0-9]+]] +; TUNIT-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test_not_captured_but_returned_calls ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10:[0-9]+]] -; CGSCC-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11:[0-9]+]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret void ; entry: @@ -433,18 +501,18 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @negative_test_not_captured_but_returned_call_0a(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0a ; TUNIT-SAME: (i64* nofree returned writeonly align 8 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i64* [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0a ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret i64* [[CALL]] ; entry: @@ -460,20 +528,20 @@ entry: ; ; There should *not* be a no-capture attribute on %a define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0b ; TUNIT-SAME: (i64* nofree writeonly align 8 [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[A]] to i64 ; TUNIT-NEXT: store i64 [[TMP0]], i64* [[A]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0b ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; CGSCC-NEXT: store i64 [[TMP0]], i64* [[A]], align 8 ; CGSCC-NEXT: ret void @@ -493,18 +561,18 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @negative_test_not_captured_but_returned_call_1a(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1a ; TUNIT-SAME: (i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i64* [[CALL]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1a ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret i64* [[CALL]] ; entry: @@ -520,20 +588,20 @@ entry: ; ; There should *not* be a no-capture attribute on %a define void @negative_test_not_captured_but_returned_call_1b(i64* %a) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1b ; TUNIT-SAME: (i64* nofree writeonly align 8 [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; TUNIT-NEXT: store i64 [[TMP0]], i64* [[CALL]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1b ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; CGSCC-NEXT: store i64 [[TMP0]], i64* [[CALL]], align 8 ; CGSCC-NEXT: ret void @@ -619,18 +687,18 @@ r: declare i32* @readonly_unknown(i32*, i32*) readonly define void @not_captured_by_readonly_call(i32* %b) #0 { -; TUNIT: Function Attrs: noinline nounwind readonly uwtable +; TUNIT: Function Attrs: noinline nounwind memory(read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: noinline nounwind readonly uwtable +; CGSCC: Function Attrs: noinline nounwind memory(read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) #[[ATTR7:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) ; CGSCC-NEXT: ret void ; entry: @@ -644,18 +712,18 @@ entry: ; Make sure the returned flag on %r is strong enough to justify nocapture on %b but **not** on %r. ; define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* returned %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either1 ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either1 ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR12:[0-9]+]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -665,18 +733,18 @@ entry: declare i32* @readonly_unknown_r1a(i32*, i32* returned) readonly define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either2 ; TUNIT-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either2 ; CGSCC-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR12]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -686,18 +754,18 @@ entry: declare i32* @readonly_unknown_r1b(i32*, i32* returned) readonly nounwind define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either3 ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either3 ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR12]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -706,18 +774,18 @@ entry: } define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r) nounwind { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either4 ; TUNIT-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR6]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either4 ; CGSCC-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR7]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -743,19 +811,12 @@ entry: declare i32* @readonly_i32p(i32*) readonly define void @nocapture_is_not_subsumed_2(i32* nocapture %b) { -; TUNIT-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 -; TUNIT-SAME: (i32* nocapture [[B:%.*]]) { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) #[[ATTR6]] -; TUNIT-NEXT: store i32 0, i32* [[CALL]], align 4 -; TUNIT-NEXT: ret void -; -; CGSCC-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 -; CGSCC-SAME: (i32* nocapture [[B:%.*]]) { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) #[[ATTR7]] -; CGSCC-NEXT: store i32 0, i32* [[CALL]], align 4 -; CGSCC-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 +; CHECK-SAME: (i32* nocapture [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) +; CHECK-NEXT: store i32 0, i32* [[CALL]], align 4 +; CHECK-NEXT: ret void ; entry: %call = call i32* @readonly_i32p(i32* %b) @@ -765,26 +826,30 @@ entry: attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind memory(none) } ; TUNIT: attributes #[[ATTR3]] = { noinline nounwind uwtable } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR5]] = { nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR6]] = { readonly } -; TUNIT: attributes #[[ATTR7]] = { noinline nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR8]] = { nounwind readonly } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; TUNIT: attributes #[[ATTR5]] = { nofree noinline norecurse nosync nounwind willreturn memory(write) uwtable } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR7]] = { noinline nounwind memory(read) uwtable } +; TUNIT: attributes #[[ATTR8]] = { nounwind memory(read) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind memory(none) } ; CGSCC: attributes #[[ATTR3]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR6]] = { nofree noinline nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR7]] = { readonly } -; CGSCC: attributes #[[ATTR8]] = { noinline nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR9]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR10]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR6]] = { nofree noinline nosync nounwind willreturn memory(write) uwtable } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR8]] = { noinline nounwind memory(read) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR12]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll index 41355326dc910..c22fcfde8a71b 100644 --- a/llvm/test/Transforms/Attributor/nodelete.ll +++ b/llvm/test/Transforms/Attributor/nodelete.ll @@ -6,13 +6,13 @@ %"b" = type { i8 } define hidden i64 @f1() align 2 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; CGSCC-NEXT: entry: @@ -27,7 +27,7 @@ entry: } define internal i64 @f2(%"a"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -43,7 +43,7 @@ entry: } define internal void @f3(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -58,7 +58,7 @@ entry: } define internal i1 @f4(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f4 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -73,7 +73,7 @@ entry: } define internal %"a"* @f5(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f5 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] align 2 { ; CGSCC-NEXT: entry: @@ -87,9 +87,9 @@ entry: ret %"a"* %0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index 13bfde5277b86..9eb23ab793d12 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -15,7 +15,7 @@ declare void @_ZdaPv(i8*) local_unnamed_addr #2 ; TEST 1 (positive case) define void @only_return() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@only_return ; CHECK-SAME: () #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: ret void @@ -104,12 +104,12 @@ end: define void @mutual_recursion1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion1 ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion1 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: ret void @@ -119,12 +119,12 @@ define void @mutual_recursion1() #0 { } define void @mutual_recursion2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion2 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion2 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: ret void @@ -182,17 +182,17 @@ define noalias i8* @call_realloc(i8* nocapture %0, i64 %1) local_unnamed_addr #0 ; Call function declaration with "nofree" -; CHECK: Function Attrs: nofree noinline nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline nounwind memory(none) uwtable ; CHECK-NEXT: declare void @nofree_function() declare void @nofree_function() nofree readnone #0 define void @call_nofree_function() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@call_nofree_function ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@call_nofree_function ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: ret void @@ -240,12 +240,12 @@ define void @call_both() #0 { ; TEST 10 (positive case) ; Call intrinsic function -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare float @llvm.floor.f32(float) declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: ret void @@ -255,7 +255,7 @@ define void @call_floor(float %a) #0 { } define float @call_floor2(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor2 ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR11:[0-9]+]] @@ -269,12 +269,12 @@ define float @call_floor2(float %a) #0 { ; Check propagation. define void @f1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: () #[[ATTR5]] { ; CGSCC-NEXT: ret void @@ -284,12 +284,12 @@ define void @f1() #0 { } define void @f2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@f2 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: () #[[ATTR5]] { ; CGSCC-NEXT: ret void @@ -357,7 +357,7 @@ define void @nonnull_assume_pos(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_pos ; CHECK-SAME: (i8* nofree [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* nofree [[ARG3:%.*]], i8* [[ARG4:%.*]]) { -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR12:[0-9]+]] [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG3]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR11]] [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG3]]) ] ; CHECK-NEXT: call void @unknown(i8* nofree [[ARG1]], i8* [[ARG2]], i8* nofree [[ARG3]], i8* [[ARG4]]) ; CHECK-NEXT: ret void ; @@ -440,28 +440,26 @@ attributes #2 = { nobuiltin nounwind } ; TUNIT: attributes #[[ATTR0]] = { nounwind } ; TUNIT: attributes #[[ATTR1]] = { noinline nounwind uwtable } ; TUNIT: attributes #[[ATTR2]] = { nobuiltin nounwind } -; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nofree noinline nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nofree noinline nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; TUNIT: attributes #[[ATTR7]] = { nofree nounwind } ; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nobuiltin nofree nounwind } -; TUNIT: attributes #[[ATTR9:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR11]] = { readnone willreturn } -; TUNIT: attributes #[[ATTR12]] = { willreturn } +; TUNIT: attributes #[[ATTR11]] = { willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nounwind } ; CGSCC: attributes #[[ATTR1]] = { noinline nounwind uwtable } ; CGSCC: attributes #[[ATTR2]] = { nobuiltin nounwind } -; CGSCC: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nofree noinline nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CGSCC: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nofree noinline nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CGSCC: attributes #[[ATTR7]] = { nofree nounwind } ; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nobuiltin nofree nounwind } -; CGSCC: attributes #[[ATTR9:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR12]] = { willreturn } +; CGSCC: attributes #[[ATTR11]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 3bea83442d24d..5a1e37cc73133 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -20,7 +20,7 @@ define i8* @test1() { ; Return a pointer trivially nonnull (argument attribute) define i8* @test2(i8* nonnull %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i8* [[P]] @@ -29,7 +29,7 @@ define i8* @test2(i8* nonnull %p) { } define i8* @test2A(i1 %c, i8* %ret) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test2A ; CHECK-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -50,7 +50,7 @@ B: } define i8* @test2B(i1 %c, i8* %ret) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test2B ; CHECK-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -106,12 +106,12 @@ define i8* @test3(i1 %c) { ; nonnull if neither can ever return null. (In this case, they ; just never return period.) define i8* @test4_helper() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4_helper ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: ret i8* undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4_helper ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8* undef @@ -121,12 +121,12 @@ define i8* @test4_helper() { } define i8* @test4() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4 ; TUNIT-SAME: () #[[ATTR3]] { ; TUNIT-NEXT: ret i8* undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8* undef @@ -138,7 +138,7 @@ define i8* @test4() { ; Given a mutual recursive set of functions which *can* return null ; make sure we haven't marked them as nonnull. define i8* @test5_helper(i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test5_helper ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] @@ -147,7 +147,7 @@ define i8* @test5_helper(i1 %c) { ; TUNIT: end: ; TUNIT-NEXT: ret i8* null ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test5_helper ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] @@ -165,12 +165,12 @@ end: } define i8* @test5(i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test5 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: ret i8* null ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test5 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: ret i8* null @@ -237,7 +237,7 @@ exit: } define i8* @test7(i8* %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test7 ; CHECK-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i8* [[A]] @@ -247,7 +247,7 @@ define i8* @test7(i8* %a) { } define i8* @test8(i8* %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 1 @@ -258,7 +258,7 @@ define i8* @test8(i8* %a) { } define i8* @test9(i8* %a, i64 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test9 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[N]] @@ -271,7 +271,7 @@ define i8* @test9(i8* %a, i64 %n) { ; ATTRIBUTOR_OPM: define i8* @test10 ; ATTRIBUTOR_NPM: define nonnull i8* @test10 define i8* @test10(i8* %a, i64 %n) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test10 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[N]], 0 @@ -391,7 +391,7 @@ declare nonnull i8* @nonnull() define internal i32* @f1(i32* %arg) { ; FIXME: missing nonnull It should be nonnull @f1(i32* nonnull readonly %arg) -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -413,7 +413,7 @@ define internal i32* @f1(i32* %arg) { ; TUNIT-NEXT: [[TMP10:%.*]] = phi i32* [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to i32*), [[BB:%.*]] ] ; TUNIT-NEXT: ret i32* [[TMP10]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -461,14 +461,14 @@ bb9: ; preds = %bb4, %bb } define internal i32* @f2(i32* %arg) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f2 ; TUNIT-SAME: (i32* nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP:%.*]] = tail call i32* @f1(i32* nofree readonly [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret i32* [[TMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: (i32* nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: bb: @@ -482,14 +482,14 @@ bb: define dso_local noalias i32* @f3(i32* %arg) { ; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull readonly %arg) -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f3 ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP:%.*]] = call i32* @f1(i32* nofree readonly [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret i32* [[TMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: bb: @@ -857,7 +857,7 @@ define i8 @parent6(i8* %a, i8* %b) { define i8 @parent7(i8* %a) { ; CHECK-LABEL: define {{[^@]+}}@parent7 ; CHECK-SAME: (i8* nonnull [[A:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i8 @use1safecall(i8* nonnull readonly [[A]]) #[[ATTR15:[0-9]+]] +; CHECK-NEXT: [[RET:%.*]] = call i8 @use1safecall(i8* nonnull readonly [[A]]) #[[ATTR13]] ; CHECK-NEXT: call void @use1nonnull(i8* nonnull [[A]]) ; CHECK-NEXT: ret i8 [[RET]] ; @@ -915,7 +915,7 @@ exc: } define i32* @gep1(i32* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@gep1 ; CHECK-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 @@ -927,13 +927,13 @@ define i32* @gep1(i32* %p) { define i32* @gep1_no_null_opt(i32* %p) #0 { ; Should't be able to derive nonnull based on gep. -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@gep1_no_null_opt ; TUNIT-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 ; TUNIT-NEXT: ret i32* [[Q]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@gep1_no_null_opt ; CGSCC-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 @@ -944,7 +944,7 @@ define i32* @gep1_no_null_opt(i32* %p) #0 { } define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@gep2 ; CHECK-SAME: (i32 addrspace(3)* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[P]], i32 1 @@ -956,7 +956,7 @@ define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) { ; FIXME: We should propagate dereferenceable here but *not* nonnull define i32 addrspace(3)* @as(i32 addrspace(3)* dereferenceable(4) %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@as ; CHECK-SAME: (i32 addrspace(3)* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32 addrspace(3)* [[P]] @@ -966,7 +966,7 @@ define i32 addrspace(3)* @as(i32 addrspace(3)* dereferenceable(4) %p) { ; CHECK-NOT: @g2() define internal i32* @g2() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g2 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32* inttoptr (i64 4 to i32*) @@ -975,15 +975,15 @@ define internal i32* @g2() { } define i32* @g1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@g1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32* inttoptr (i64 4 to i32*) ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g1 ; CGSCC-SAME: () #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 i32* @g2() #[[ATTR16:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 i32* @g2() #[[ATTR13]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = call i32* @g2() @@ -1391,10 +1391,10 @@ declare i8* @strrchr(i8* %0, i32 %1) nofree nounwind readonly willreturn ; We should not mark the return of @strrchr as `nonnull`, it may well be NULL! define i8* @mybasename(i8* nofree readonly %str) { -; CHECK: Function Attrs: nofree nounwind readonly willreturn +; CHECK: Function Attrs: nofree nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@mybasename ; CHECK-SAME: (i8* nofree readonly [[STR:%.*]]) #[[ATTR12:[0-9]+]] { -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strrchr(i8* nofree readonly [[STR]], i32 noundef 47) #[[ATTR15]] +; CHECK-NEXT: [[CALL:%.*]] = call i8* @strrchr(i8* nofree readonly [[STR]], i32 noundef 47) #[[ATTR13]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[CALL]], null ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 1 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i8* [[ADD_PTR]], i8* [[STR]] @@ -1486,7 +1486,7 @@ declare void @use_i8_ptr(i8* nofree nocapture readnone) nounwind declare void @use_i8_ptr_ret(i8* nofree nocapture readnone) nounwind willreturn define i8* @nonnull_function_ptr_1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonnull_function_ptr_1 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i8* bitcast (i8* ()* @nonnull_function_ptr_1 to i8*) @@ -1497,7 +1497,7 @@ define i8* @nonnull_function_ptr_1() { declare i8* @function_decl() define i8* @nonnull_function_ptr_2() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonnull_function_ptr_2 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i8* bitcast (i8* ()* @function_decl to i8*) @@ -1522,38 +1522,35 @@ declare void @nonnull_callee(i8* nonnull %p) attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} ;. -; TUNIT: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR4]] = { noreturn } ; TUNIT: attributes #[[ATTR5]] = { nounwind } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree nosync nounwind readonly } +; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind memory(argmem: read) } ; TUNIT: attributes #[[ATTR7]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } ; TUNIT: attributes #[[ATTR10]] = { naked } ; TUNIT: attributes #[[ATTR11]] = { noinline optnone } -; TUNIT: attributes #[[ATTR12]] = { nofree nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR12]] = { nofree nounwind willreturn memory(read) } ; TUNIT: attributes #[[ATTR13]] = { willreturn } -; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind readonly } -; TUNIT: attributes #[[ATTR15]] = { readonly willreturn } +; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind } ;. -; CGSCC: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { noreturn } ; CGSCC: attributes #[[ATTR4]] = { nounwind } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind readonly } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind memory(argmem: read) } ; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR10]] = { naked } ; CGSCC: attributes #[[ATTR11]] = { noinline optnone } -; CGSCC: attributes #[[ATTR12]] = { nofree nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR12]] = { nofree nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind readonly } -; CGSCC: attributes #[[ATTR15]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR16]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll index 8361c4c1547e3..ba9cbb9dbaac2 100644 --- a/llvm/test/Transforms/Attributor/norecurse.ll +++ b/llvm/test/Transforms/Attributor/norecurse.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define i32 @leaf() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -12,7 +12,7 @@ define i32 @leaf() { } define i32 @self_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@self_rec ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 4 @@ -22,12 +22,12 @@ define i32 @self_rec() { } define i32 @indirect_rec() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@indirect_rec ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@indirect_rec ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 undef @@ -36,12 +36,12 @@ define i32 @indirect_rec() { ret i32 %a } define i32 @indirect_rec2() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@indirect_rec2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@indirect_rec2 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 undef @@ -51,7 +51,7 @@ define i32 @indirect_rec2() { } define i32 @extern() { -; CHECK: Function Attrs: nosync readnone +; CHECK: Function Attrs: nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@extern ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -66,7 +66,7 @@ define i32 @extern() { declare i32 @k() readnone define void @intrinsic(i8* %dest, i8* %src, i32 %len) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@intrinsic ; CHECK-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR9:[0-9]+]] @@ -81,7 +81,7 @@ define void @intrinsic(i8* %dest, i8* %src, i32 %len) { declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define internal i32 @called_by_norecurse() { -; CHECK: Function Attrs: norecurse nosync readnone +; CHECK: Function Attrs: norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -91,13 +91,13 @@ define internal i32 @called_by_norecurse() { ret i32 %a } define void @m() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@m ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@m ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() @@ -108,13 +108,13 @@ define void @m() norecurse { } define internal i32 @called_by_norecurse_indirectly() { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[A:%.*]] = call i32 @k() ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: nosync readnone +; CGSCC: Function Attrs: nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @k() @@ -124,13 +124,13 @@ define internal i32 @called_by_norecurse_indirectly() { ret i32 %a } define internal i32 @o() { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@o ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@o ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() @@ -140,13 +140,13 @@ define internal i32 @o() { ret i32 %a } define i32 @p() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@p ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @o() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @o() #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@p ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @o() @@ -157,7 +157,7 @@ define i32 @p() norecurse { } define void @f(i32 %x) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: (i32 [[X:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -170,7 +170,7 @@ define void @f(i32 %x) { ; TUNIT: if.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -198,7 +198,7 @@ if.end: } define void @g() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@g ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -241,7 +241,7 @@ define i32 @eval_func2(i32 (i32)* , i32) local_unnamed_addr null_pointer_is_vali ; Call an unknown function in a dead block. declare void @unknown() define i32 @call_unknown_in_dead_block() local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@call_unknown_in_dead_block ; CHECK-SAME: () local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -307,14 +307,26 @@ f: } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR2]] = { nosync readnone } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { readnone } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR6]] = { norecurse nosync readnone } -; CHECK: attributes #[[ATTR7]] = { null_pointer_is_valid } -; CHECK: attributes #[[ATTR8:[0-9]+]] = { norecurse } -; CHECK: attributes #[[ATTR9]] = { willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nosync memory(none) } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { memory(none) } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; TUNIT: attributes #[[ATTR7]] = { null_pointer_is_valid } +; TUNIT: attributes #[[ATTR8]] = { norecurse } +; TUNIT: attributes #[[ATTR9]] = { willreturn } +; TUNIT: attributes #[[ATTR10]] = { nosync } +;. +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nosync memory(none) } +; CGSCC: attributes #[[ATTR3:[0-9]+]] = { memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; CGSCC: attributes #[[ATTR7]] = { null_pointer_is_valid } +; CGSCC: attributes #[[ATTR8]] = { norecurse } +; CGSCC: attributes #[[ATTR9]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn.ll b/llvm/test/Transforms/Attributor/noreturn.ll index 8d1d275c31f99..d3ef7f354d36c 100644 --- a/llvm/test/Transforms/Attributor/noreturn.ll +++ b/llvm/test/Transforms/Attributor/noreturn.ll @@ -15,7 +15,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; } ; define void @srec0() #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -34,7 +34,7 @@ entry: ; } ; define i32 @srec16(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline noreturn nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec16 ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -73,7 +73,7 @@ exit: ; } ; define i32 @endless_loop(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@endless_loop ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -98,7 +98,7 @@ while.body: ; preds = %entry, %while.body ; ; FIXME: no-return missing (D65243 should fix this) define i32 @dead_return(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@dead_return ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -126,7 +126,7 @@ return: ; No predecessors! ; } ; define i32 @multiple_noreturn_calls(i32 %a) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse noreturn nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@multiple_noreturn_calls ; TUNIT-SAME: (i32 [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -139,7 +139,7 @@ define i32 @multiple_noreturn_calls(i32 %a) #0 { ; TUNIT: cond.end: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noinline noreturn nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@multiple_noreturn_calls ; CGSCC-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -174,7 +174,7 @@ cond.end: ; preds = %cond.false, %cond.t ; FIXME: we should derive "UB" as an argument and report it to the user on request. define i32 @endless_loop_but_willreturn() willreturn { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@endless_loop_but_willreturn ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -182,7 +182,7 @@ define i32 @endless_loop_but_willreturn() willreturn { ; TUNIT: while.body: ; TUNIT-NEXT: br label [[WHILE_BODY]] ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@endless_loop_but_willreturn ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -199,13 +199,13 @@ while.body: ; preds = %entry, %while.body ; TEST 6b: willreturn means *not* no-return or UB define i32 @UB_and_willreturn() willreturn { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@UB_and_willreturn ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UB_and_willreturn ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: entry: @@ -217,14 +217,14 @@ entry: attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse noreturn nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse noreturn nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index cae672f644a17..859c8f69b0dde 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -149,6 +149,6 @@ declare dso_local i32 @printf(i8* %_Format, ...) declare i32 @llvm.eh.exceptioncode(token) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll index 7c6c7a8272523..681dbf5980c7a 100644 --- a/llvm/test/Transforms/Attributor/noreturn_sync.ll +++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll @@ -139,6 +139,6 @@ declare dso_local i32 @printf(i8* %_Format, ...) declare i32 @llvm.eh.exceptioncode(token) ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nosync.ll b/llvm/test/Transforms/Attributor/nosync.ll index e3a1f82156da6..324c49fe0151c 100644 --- a/llvm/test/Transforms/Attributor/nosync.ll +++ b/llvm/test/Transforms/Attributor/nosync.ll @@ -30,7 +30,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[A:[a-zA-Z0-9_$"\\.-]+]] = common global i32 0, align 4 ;. define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { -; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (%struct.ST* nofree readnone "no-capture-maybe-returned" [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -50,7 +50,7 @@ entry: ; } define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_monotonic ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0]] monotonic, align 4 @@ -68,7 +68,7 @@ define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtabl ; } define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@store_monotonic ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store atomic i32 10, i32* [[TMP0]] monotonic, align 4 @@ -86,7 +86,7 @@ define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable { ; } define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_acquire ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0]] acquire, align 4 @@ -103,7 +103,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable ; } define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_release ; CHECK-SAME: (i32* nocapture nofree noundef writeonly align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0]] release, align 4 @@ -116,7 +116,7 @@ define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { ; TEST 6 - negative volatile, relaxed atomic define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_volatile_release ; CHECK-SAME: (i32* nocapture nofree noundef writeonly align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0]] release, align 4 @@ -133,7 +133,7 @@ define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable ; } define void @volatile_store(i32* %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@volatile_store ; CHECK-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store volatile i32 14, i32* [[TMP0]], align 4 @@ -151,7 +151,7 @@ define void @volatile_store(i32* %0) norecurse nounwind uwtable { ; } define i32 @volatile_load(i32* %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@volatile_load ; CHECK-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 @@ -199,14 +199,14 @@ define void @call_might_sync() nounwind uwtable noinline { ; volatile operation in same scc but dead. Call volatile_load defined in TEST 8. define i32 @scc1(i32* %0) noinline nounwind uwtable { -; TUNIT: Function Attrs: argmemonly nofree noinline nounwind uwtable +; TUNIT: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc1 ; TUNIT-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: tail call void @scc2(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] ; TUNIT-NEXT: [[VAL:%.*]] = tail call i32 @volatile_load(i32* nofree align 4 [[TMP0]]) #[[ATTR19]] ; TUNIT-NEXT: ret i32 [[VAL]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nounwind uwtable +; CGSCC: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc1 ; CGSCC-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: tail call void @scc2(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] @@ -219,7 +219,7 @@ define i32 @scc1(i32* %0) noinline nounwind uwtable { } define void @scc2(i32* %0) noinline nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree noinline nounwind uwtable +; CHECK: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@scc2 ; CHECK-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @scc1(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] @@ -349,7 +349,7 @@ declare void @llvm.memset(i8* %dest, i8 %val, i32 %len, i1 %isvolatile) ; It is odd to add nocapture but a result of the llvm.memcpy nocapture. ; define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@memcpy_volatile ; CHECK-SAME: (i8* nocapture nofree writeonly [[PTR1:%.*]], i8* nocapture nofree readonly [[PTR2:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[PTR1]], i8* noalias nocapture nofree readonly [[PTR2]], i32 noundef 8, i1 noundef true) #[[ATTR20:[0-9]+]] @@ -364,10 +364,10 @@ define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { ; It is odd to add nocapture but a result of the llvm.memset nocapture. ; define i32 @memset_non_volatile(i8* %ptr1, i8 %val) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@memset_non_volatile ; CHECK-SAME: (i8* nocapture nofree writeonly [[PTR1:%.*]], i8 [[VAL:%.*]]) #[[ATTR11:[0-9]+]] { -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nocapture nofree writeonly [[PTR1]], i8 [[VAL]], i32 noundef 8, i1 noundef false) #[[ATTR21:[0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nocapture nofree writeonly [[PTR1]], i8 [[VAL]], i32 noundef 8, i1 noundef false) #[[ATTR20]] ; CHECK-NEXT: ret i32 4 ; call void @llvm.memset(i8* %ptr1, i8 %val, i32 8, i1 0) @@ -390,7 +390,7 @@ declare void @readnone_test() convergent readnone ; TEST 17 - negative. Convergent define void @convergent_readnone(){ -; CHECK: Function Attrs: readnone +; CHECK: Function Attrs: memory(none) ; CHECK-LABEL: define {{[^@]+}}@convergent_readnone ; CHECK-SAME: () #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: call void @readnone_test() @@ -423,7 +423,7 @@ declare float @llvm.cos(float %val) readnone ; TEST 19 - positive, readnone & non-convergent intrinsic. define i32 @cos_test(float %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cos_test ; CHECK-SAME: (float [[X:%.*]]) #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: ret i32 4 @@ -433,37 +433,35 @@ define i32 @cos_test(float %x) { } define float @cos_test2(float %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cos_test2 ; CHECK-SAME: (float [[X:%.*]]) #[[ATTR15]] { -; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X]]) #[[ATTR22:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X]]) #[[ATTR20]] ; CHECK-NEXT: ret float [[C]] ; %c = call float @llvm.cos(float %x) ret float %c } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn uwtable } -; CHECK: attributes #[[ATTR2]] = { argmemonly nofree norecurse nounwind willreturn uwtable } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } ; CHECK: attributes #[[ATTR3]] = { noinline nosync nounwind uwtable } ; CHECK: attributes #[[ATTR4]] = { noinline nounwind uwtable } -; CHECK: attributes #[[ATTR5]] = { argmemonly nofree noinline nounwind uwtable } +; CHECK: attributes #[[ATTR5]] = { nofree noinline nounwind memory(argmem: readwrite) uwtable } ; CHECK: attributes #[[ATTR6]] = { nofree norecurse nounwind willreturn } ; CHECK: attributes #[[ATTR7]] = { nofree norecurse nounwind } ; CHECK: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn } ; CHECK: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind } -; CHECK: attributes #[[ATTR10]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR11]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR12:[0-9]+]] = { convergent readnone } -; CHECK: attributes #[[ATTR13]] = { readnone } +; CHECK: attributes #[[ATTR10]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes #[[ATTR12:[0-9]+]] = { convergent memory(none) } +; CHECK: attributes #[[ATTR13]] = { memory(none) } ; CHECK: attributes #[[ATTR14]] = { nounwind } -; CHECK: attributes #[[ATTR15]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR16:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR17:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR15]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR17:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CHECK: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR19]] = { nofree nounwind } ; CHECK: attributes #[[ATTR20]] = { willreturn } -; CHECK: attributes #[[ATTR21]] = { willreturn writeonly } -; CHECK: attributes #[[ATTR22]] = { readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nounwind.ll b/llvm/test/Transforms/Attributor/nounwind.ll index 1b4b4d19bdb91..0e1002535dd38 100644 --- a/llvm/test/Transforms/Attributor/nounwind.ll +++ b/llvm/test/Transforms/Attributor/nounwind.ll @@ -4,7 +4,7 @@ ; TEST 1 define i32 @foo1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -14,12 +14,12 @@ define i32 @foo1() { ; TEST 2 define i32 @scc1_foo() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scc1_foo ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scc1_foo ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 1 @@ -31,12 +31,12 @@ define i32 @scc1_foo() { ; TEST 3 define i32 @scc1_bar() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scc1_bar ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scc1_bar ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 1 @@ -145,8 +145,8 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/openmp_parallel.ll b/llvm/test/Transforms/Attributor/openmp_parallel.ll index 1d0c3c880c891..01de35d791721 100644 --- a/llvm/test/Transforms/Attributor/openmp_parallel.ll +++ b/llvm/test/Transforms/Attributor/openmp_parallel.ll @@ -69,7 +69,7 @@ define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_ti ; TUNIT-NEXT: br label [[OMP_PRECOND_THEN:%.*]] ; TUNIT: omp.precond.then: ; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP0]]) #[[ATTR3:[0-9]+]] ; TUNIT-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* ; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) @@ -129,7 +129,7 @@ define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_ti ; CGSCC-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] ; CGSCC: omp.precond.then: ; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR3:[0-9]+]] ; CGSCC-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 ; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* ; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP2]]) @@ -264,7 +264,8 @@ attributes #2 = { nounwind } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline nofree norecurse nounwind uwtable } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { memory(readwrite) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{!2} diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll index 5a771dcca7ec5..0b947919ec599 100644 --- a/llvm/test/Transforms/Attributor/pointer-info.ll +++ b/llvm/test/Transforms/Attributor/pointer-info.ll @@ -6,7 +6,7 @@ %struct.test.a = type { %struct.test.b, i32, i8*} define void @foo(i8* %ptr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i8* nocapture nofree readnone [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -17,7 +17,7 @@ define void @foo(i8* %ptr) { ; TUNIT-NEXT: tail call void @bar(%struct.test.a* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -41,7 +41,7 @@ call.br: } define void @bar(%struct.test.a* noundef byval(%struct.test.a) align 8 %dev) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (%struct.test.a* noalias nocapture nofree noundef nonnull writeonly byval([[STRUCT_TEST_A:%.*]]) align 8 dereferenceable(24) [[DEV:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], %struct.test.a* [[DEV]], i64 0, i32 0 @@ -55,11 +55,11 @@ define void @bar(%struct.test.a* noundef byval(%struct.test.a) align 8 %dev) { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll index 8e8c993fa637b..977eb29f02bff 100644 --- a/llvm/test/Transforms/Attributor/potential.ll +++ b/llvm/test/Transforms/Attributor/potential.ll @@ -9,7 +9,7 @@ ; bool potential_test1(bool c) { return iszero(c ? 1 : -1); } define internal i1 @iszero1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@iszero1 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -20,12 +20,12 @@ define internal i1 @iszero1(i32 %c) { } define i1 @potential_test1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[ARG:%.*]] = select i1 [[C]], i32 -1, i32 1 @@ -47,7 +47,7 @@ define i1 @potential_test1(i1 %c) { ; int potential_test2(int x) { return call_with_two_values(1) + call_with_two_values(-1); } define internal i32 @iszero2(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@iszero2 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -60,7 +60,7 @@ define internal i32 @iszero2(i32 %c) { } define internal i32 @call_with_two_values(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_with_two_values ; TUNIT-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 noundef [[C]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]] @@ -69,7 +69,7 @@ define internal i32 @call_with_two_values(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_with_two_values ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 noundef [[C]]) #[[ATTR2]] @@ -86,7 +86,7 @@ define internal i32 @call_with_two_values(i32 %c) { } define i32 @potential_test2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 noundef 1) #[[ATTR1]], !range [[RNG1:![0-9]+]] @@ -94,7 +94,7 @@ define i32 @potential_test2(i1 %c) { ; TUNIT-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 noundef 1) #[[ATTR2]] @@ -120,7 +120,7 @@ define i32 @potential_test2(i1 %c) { ; int potential_test3() { return zero_or_one(iszero(0))+zero_or_one(iszero(1)); } define internal i32 @iszero3(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@iszero3 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -133,7 +133,7 @@ define internal i32 @iszero3(i32 %c) { } define internal i32 @less_than_two(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_two ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 2 @@ -146,12 +146,12 @@ define internal i32 @less_than_two(i32 %c) { } define i32 @potential_test3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test3 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 2 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test3 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 noundef 0) #[[ATTR2]] @@ -181,7 +181,7 @@ define i32 @potential_test3() { ; int potential_test7(int c) { return return1or3(c) == return3or4(c); } define i32 @potential_test4(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test4 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -189,7 +189,7 @@ define i32 @potential_test4(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test4 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -204,7 +204,7 @@ define i32 @potential_test4(i32 %c) { } define i32 @potential_test5(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test5 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -213,7 +213,7 @@ define i32 @potential_test5(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test5 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -230,14 +230,14 @@ define i32 @potential_test5(i32 %c) { } define i1 @potential_test6(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test6 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] ; TUNIT-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 ; TUNIT-NEXT: ret i1 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test6 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -250,7 +250,7 @@ define i1 @potential_test6(i32 %c) { } define i1 @potential_test7(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test7 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -258,7 +258,7 @@ define i1 @potential_test7(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i1 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test7 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -273,7 +273,7 @@ define i1 @potential_test7(i32 %c) { } define internal i32 @return1or3(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return1or3 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -286,7 +286,7 @@ define internal i32 @return1or3(i32 %c) { } define internal i32 @return2or4(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return2or4 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -299,7 +299,7 @@ define internal i32 @return2or4(i32 %c) { } define internal i32 @return3or4(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return3or4 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -316,7 +316,7 @@ define internal i32 @return3or4(i32 %c) { ; propagate argument to callsite argument define internal i1 @cmp_with_four(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cmp_with_four ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 4 @@ -327,7 +327,7 @@ define internal i1 @cmp_with_four(i32 %c) { } define internal i1 @wrapper(i32 %c) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@wrapper ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i1 @cmp_with_four(i32 noundef [[C]]) #[[ATTR2]] @@ -338,12 +338,12 @@ define internal i1 @wrapper(i32 %c) { } define i1 @potential_test8() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test8 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test8 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RES1:%.*]] = call i1 @wrapper(i32 noundef 1) #[[ATTR2]] @@ -362,7 +362,7 @@ define i1 @potential_test8() { } define i1 @potential_test9() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test9 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -405,7 +405,7 @@ end: ; and returned value of @potential_test10 can be simplified to 0(false) define internal i32 @may_return_undef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@may_return_undef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -430,14 +430,14 @@ otherwise: } define i1 @potential_test10(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test10 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @may_return_undef(i32 [[C]]) #[[ATTR1]] ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[RET]], 0 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test10 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @may_return_undef(i32 [[C]]) #[[ATTR2]] @@ -450,7 +450,7 @@ define i1 @potential_test10(i32 %c) { } define i32 @optimize_undef_1(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_1 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -469,7 +469,7 @@ f: } define i32 @optimize_undef_2(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_2 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -488,7 +488,7 @@ f: } define i32 @optimize_undef_3(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_3 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -511,7 +511,7 @@ f: ; FIXME: returned value can be simplified to 0 define i32 @potential_test11(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test11 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR1]], !range [[RNG0]] @@ -521,7 +521,7 @@ define i32 @potential_test11(i1 %c) { ; TUNIT-NEXT: [[ACC2:%.*]] = add i32 [[ACC1]], [[ZERO3]] ; TUNIT-NEXT: ret i32 [[ACC2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test11 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR2]] @@ -540,7 +540,7 @@ define i32 @potential_test11(i1 %c) { } define i32 @optimize_poison_1(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_poison_1 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -560,12 +560,12 @@ f: ; FIXME: returned value can be simplified to 0 define i32 @potential_test12(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test12 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test12 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[ZERO:%.*]] = call i32 @optimize_poison_1(i1 [[C]]) #[[ATTR2]] @@ -581,7 +581,7 @@ define i32 @potential_test12(i1 %c) { ; However, we should not simplify `and i32 %c, 3` to `%c` define internal i32 @potential_test13_callee(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test13_callee ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = and i32 [[C]], 3 @@ -592,13 +592,13 @@ define internal i32 @potential_test13_callee(i32 %c) { } define i32 @potential_test13_caller1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR2]] @@ -609,13 +609,13 @@ define i32 @potential_test13_caller1() { } define i32 @potential_test13_caller2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller2 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 1) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller2 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 1) #[[ATTR2]] @@ -626,13 +626,13 @@ define i32 @potential_test13_caller2() { } define i32 @potential_test13_caller3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller3 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 undef) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller3 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 undef) #[[ATTR2]] @@ -643,7 +643,7 @@ define i32 @potential_test13_caller3() { } define i1 @potential_test14(i1 %c0, i1 %c1, i1 %c2, i1 %c3) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test14 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X0:%.*]] = select i1 [[C0]], i32 0, i32 1 @@ -662,7 +662,7 @@ define i1 @potential_test14(i1 %c0, i1 %c1, i1 %c2, i1 %c3) { } define i1 @potential_test15(i1 %c0, i1 %c1) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test15 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X0:%.*]] = select i1 [[C0]], i32 0, i32 1 @@ -677,7 +677,7 @@ define i1 @potential_test15(i1 %c0, i1 %c1) { } define i1 @potential_test16(i1 %c0, i1 %c1) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test16 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X1:%.*]] = select i1 [[C1]], i32 0, i32 1 @@ -691,12 +691,12 @@ define i1 @potential_test16(i1 %c0, i1 %c1) { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. ; TUNIT: [[RNG0]] = !{i32 0, i32 2} ; TUNIT: [[RNG1]] = !{i32 0, i32 3} diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index f94c986e9e46d..b9d0b6d0c99fb 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -5,7 +5,7 @@ ; FIXME: CGSCC is not looking at callees and calleers even though it could be allowed. define i32 @test0(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test0 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P]], align 4, !range [[RNG0:![0-9]+]] @@ -16,13 +16,13 @@ define i32 @test0(i32* %p) { } define i32 @test0-range-check(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test0-range-check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test0-range-check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR5:[0-9]+]] @@ -269,7 +269,7 @@ define void @test0-icmp-check(i32* %p){ ret void } define i32 @test1(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD_10_100:%.*]] = load i32, i32* [[P]], align 4, !range [[RNG1:![0-9]+]] @@ -285,14 +285,14 @@ define i32 @test1(i32* %p) { define i1 @test1-check(i32* %p) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test1-check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]] ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test1-check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR5]] @@ -317,7 +317,7 @@ define i1 @test1-check(i32* %p) { ; } define i32 @test2(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -334,7 +334,7 @@ entry: } define i32 @test2_check(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test2_check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -349,7 +349,7 @@ define i32 @test2_check(i32* %p) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 2, [[IF_THEN]] ], [ 3, [[IF_END]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test2_check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -408,7 +408,7 @@ return: ; preds = %if.end, %if.then declare dso_local void @unkown() define internal i32 @r1(i32) local_unnamed_addr { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@r1 ; TUNIT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: br label [[TMP4:%.*]] @@ -427,7 +427,7 @@ define internal i32 @r1(i32) local_unnamed_addr { ; TUNIT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 100 ; TUNIT-NEXT: br i1 [[TMP9]], label [[TMP1:%.*]], label [[TMP4]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@r1 ; CGSCC-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: br label [[TMP4:%.*]] @@ -467,7 +467,7 @@ f: define void @f1(i32){ ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i32 [[TMP0:%.*]]) { -; TUNIT-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR3]] ; TUNIT-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 15 ; TUNIT-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; TUNIT: 4: @@ -478,7 +478,7 @@ define void @f1(i32){ ; ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i32 [[TMP0:%.*]]) { -; CGSCC-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR5]] ; CGSCC-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 15 ; CGSCC-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CGSCC: 4: @@ -510,7 +510,7 @@ define void @f1(i32){ ; } ; } define dso_local i32 @test4-f1(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-f1 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -522,7 +522,7 @@ define dso_local i32 @test4-f1(i32 %u) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-f1 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -549,18 +549,18 @@ return: ; preds = %entry, %if.then define dso_local i32 @test4-g1(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-g1 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR4]] +; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-g1 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CALL]] ; ; FIXME: %call should have range [0, inf] @@ -579,7 +579,7 @@ entry: ; } ; } define dso_local i32 @test4-f2(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-f2 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -594,7 +594,7 @@ define dso_local i32 @test4-f2(i32 %u) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-f2 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -627,18 +627,18 @@ return: ; preds = %if.else, %if.then define dso_local i32 @test4-g2(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-g2 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR4]], !range [[RNG3:![0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR3]], !range [[RNG3:![0-9]+]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-g2 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CALL]] ; entry: @@ -718,7 +718,7 @@ declare dso_local i32 @foo(i32) ; FIXME: All but the return is not needed anymore define dso_local zeroext i1 @phi(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@phi ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -750,7 +750,7 @@ define dso_local zeroext i1 @phi(i32 %arg) { ; TUNIT-NEXT: [[DOT0:%.*]] = phi i1 [ true, [[BB11]] ], [ false, [[BB12]] ] ; TUNIT-NEXT: ret i1 [[DOT0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@phi ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: bb: @@ -822,7 +822,7 @@ bb13: ; preds = %bb12, %bb11 } define dso_local i1 @select(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -834,7 +834,7 @@ define dso_local i1 @select(i32 %a) local_unnamed_addr #0 { ; TUNIT-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5 ; TUNIT-NEXT: ret i1 [[CMP6]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -857,7 +857,7 @@ entry: } define dso_local i32 @select_zext(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select_zext ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -870,7 +870,7 @@ define dso_local i32 @select_zext(i32 %a) local_unnamed_addr #0 { ; TUNIT-NEXT: [[DOT13:%.*]] = zext i1 [[CMP6]] to i32 ; TUNIT-NEXT: ret i32 [[DOT13]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select_zext ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -895,7 +895,7 @@ entry: } define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -910,7 +910,7 @@ define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr ; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64 ; TUNIT-NEXT: ret i64 [[P2I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -941,14 +941,14 @@ entry: ; } define i1 @f_fcmp(float %a, float %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f_fcmp ; TUNIT-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = fcmp uge float [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f_fcmp ; CGSCC-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = fcmp uge float [[A]], [[B]] @@ -960,14 +960,14 @@ define i1 @f_fcmp(float %a, float %b) { ret i1 %s } define i1 @d_fcmp(double %a, double %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@d_fcmp ; TUNIT-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = fcmp oeq double [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@d_fcmp ; CGSCC-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = fcmp oeq double [[A]], [[B]] @@ -979,14 +979,14 @@ define i1 @d_fcmp(double %a, double %b) { ret i1 %s } define i1 @dp_icmp(double* %a, double* %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@dp_icmp ; TUNIT-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp sge double* [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@dp_icmp ; CGSCC-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp sge double* [[A]], [[B]] @@ -998,14 +998,14 @@ define i1 @dp_icmp(double* %a, double* %b) { ret i1 %s } define i1 @ip_icmp(i8* %a, i8* %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ip_icmp ; TUNIT-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp ult i8* [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ip_icmp ; CGSCC-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp ult i8* [[A]], [[B]] @@ -1017,25 +1017,25 @@ define i1 @ip_icmp(i8* %a, i8* %b) { ret i1 %s } define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dpa, double* %dpb, i8* %ipa, i8* %ipb) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fcmp_caller ; TUNIT-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR3]] ; TUNIT-NEXT: [[O1:%.*]] = or i1 [[R1]], [[R2]] ; TUNIT-NEXT: [[O2:%.*]] = or i1 [[R3]], [[R4]] ; TUNIT-NEXT: [[O3:%.*]] = or i1 [[O1]], [[O2]] ; TUNIT-NEXT: ret i1 [[O3]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fcmp_caller ; CGSCC-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR5]] ; CGSCC-NEXT: [[O1:%.*]] = or i1 [[R1]], [[R2]] ; CGSCC-NEXT: [[O2:%.*]] = or i1 [[R3]], [[R4]] ; CGSCC-NEXT: [[O3:%.*]] = or i1 [[O1]], [[O2]] @@ -1052,12 +1052,12 @@ define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dp } define i8 @ret_two() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret_two ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 2 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_two ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i8 2 @@ -1065,12 +1065,12 @@ define i8 @ret_two() { ret i8 2 } define i8 @ret_undef() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret_undef ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_undef ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i8 undef @@ -1080,15 +1080,15 @@ define i8 @ret_undef() { ; Verify we collapse undef to a value and return something non-undef here. define i8 @undef_collapse_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_1 ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_undef() #[[ATTR6]] +; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_undef() #[[ATTR5]] ; CGSCC-NEXT: [[S:%.*]] = shl i8 [[C]], 2 ; CGSCC-NEXT: ret i8 [[S]] ; @@ -1099,15 +1099,15 @@ define i8 @undef_collapse_1() { ; Verify we collapse undef to a value and return something non-undef here. define i8 @undef_collapse_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_2 ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_two() #[[ATTR6]] +; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_two() #[[ATTR5]] ; CGSCC-NEXT: [[S:%.*]] = shl i8 undef, [[C]] ; CGSCC-NEXT: ret i8 [[S]] ; @@ -1118,16 +1118,16 @@ define i8 @undef_collapse_2() { define i8 @undef_collapse_caller() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_caller ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_caller ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C1:%.*]] = call i8 @undef_collapse_1() #[[ATTR6]] -; CGSCC-NEXT: [[C2:%.*]] = call i8 @undef_collapse_2() #[[ATTR6]] +; CGSCC-NEXT: [[C1:%.*]] = call i8 @undef_collapse_1() #[[ATTR5]] +; CGSCC-NEXT: [[C2:%.*]] = call i8 @undef_collapse_2() #[[ATTR5]] ; CGSCC-NEXT: [[A:%.*]] = add i8 [[C1]], [[C2]] ; CGSCC-NEXT: ret i8 [[A]] ; @@ -1138,13 +1138,13 @@ define i8 @undef_collapse_caller() { } define i32 @ret1or2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret1or2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[S:%.*]] = select i1 [[C]], i32 1, i32 2 ; TUNIT-NEXT: ret i32 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret1or2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[S:%.*]] = select i1 [[C]], i32 1, i32 2 @@ -1155,11 +1155,11 @@ define i32 @ret1or2(i1 %c) { } define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callee_range_1 ; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]] ; TUNIT-NEXT: [[INDIRECTION:%.*]] = select i1 [[C3]], i32 [[R1]], i32 [[R2]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[R1]], [[INDIRECTION]] ; TUNIT-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 4 @@ -1167,11 +1167,11 @@ define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { ; TUNIT-NEXT: [[F:%.*]] = and i1 [[I1]], [[I2]] ; TUNIT-NEXT: ret i1 [[F]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee_range_1 ; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]] ; CGSCC-NEXT: [[INDIRECTION:%.*]] = select i1 [[C3]], i32 [[R1]], i32 [[R2]] ; CGSCC-NEXT: [[A:%.*]] = add i32 [[R1]], [[INDIRECTION]] ; CGSCC-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 4 @@ -1191,22 +1191,22 @@ define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { define i1 @callee_range_2(i1 %c1, i1 %c2) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callee_range_2 ; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; TUNIT-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; TUNIT-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 ; TUNIT-NEXT: [[F:%.*]] = and i1 [[I1]], [[I2]] ; TUNIT-NEXT: ret i1 [[F]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee_range_2 ; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]] ; CGSCC-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; CGSCC-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; CGSCC-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 @@ -1224,12 +1224,12 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) { define i32 @ret100() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret100 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 100 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret100 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i32 100 @@ -1239,7 +1239,7 @@ define i32 @ret100() { define i1 @ctx_adjustment(i32 %V) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ctx_adjustment ; TUNIT-SAME: (i32 [[V:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[C1:%.*]] = icmp sge i32 [[V]], 100 @@ -1253,7 +1253,7 @@ define i1 @ctx_adjustment(i32 %V) { ; TUNIT-NEXT: [[C2:%.*]] = icmp sge i32 [[PHI]], 100 ; TUNIT-NEXT: ret i1 [[C2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ctx_adjustment ; CGSCC-SAME: (i32 [[V:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[C1:%.*]] = icmp sge i32 [[V]], 100 @@ -1261,7 +1261,7 @@ define i1 @ctx_adjustment(i32 %V) { ; CGSCC: if.true: ; CGSCC-NEXT: br label [[END:%.*]] ; CGSCC: if.false: -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret100() #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret100() #[[ATTR5]] ; CGSCC-NEXT: br label [[END]] ; CGSCC: end: ; CGSCC-NEXT: [[PHI:%.*]] = phi i32 [ [[V]], [[IF_TRUE]] ], [ [[CALL]], [[IF_FALSE]] ] @@ -1283,13 +1283,13 @@ end: define i32 @func(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@func ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[RET:%.*]] = select i1 [[C]], i32 0, i32 1 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@func ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = select i1 [[C]], i32 0, i32 1 @@ -1300,28 +1300,28 @@ define i32 @func(i1 %c) { } define i32 @simplify_callsite_argument(i1 %d) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@simplify_callsite_argument ; TUNIT-SAME: (i1 [[D:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR4]] +; TUNIT-NEXT: [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[RET1]] ; TUNIT: f: -; TUNIT-NEXT: [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR4]] +; TUNIT-NEXT: [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[RET2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@simplify_callsite_argument ; CGSCC-SAME: (i1 [[D:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR6]] +; CGSCC-NEXT: [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[RET1]] ; CGSCC: f: -; CGSCC-NEXT: [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR6]] +; CGSCC-NEXT: [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[RET2]] ; %c = select i1 %d, i1 true, i1 false @@ -1336,7 +1336,7 @@ f: define internal i32 @less_than_65536(i32 %arg) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_65536 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[SHRINKED:%.*]] = udiv i32 [[ARG]], 65536 @@ -1347,7 +1347,7 @@ define internal i32 @less_than_65536(i32 %arg) { } define internal i1 @is_less_than_65536(i32 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_65536 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp ult i32 [[ARG]], 65536 @@ -1358,18 +1358,18 @@ define internal i1 @is_less_than_65536(i32 %arg) { } define i1 @check_divided_range(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@check_divided_range ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@check_divided_range ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_65536(i32 noundef 0) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_65536(i32 [[ARG]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET1]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET2]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_65536(i32 noundef 0) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_65536(i32 [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET1]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET2]]) #[[ATTR5]] ; CGSCC-NEXT: [[RET:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; CGSCC-NEXT: ret i1 [[RET]] ; @@ -1383,7 +1383,7 @@ define i1 @check_divided_range(i32 %arg) { define internal i32 @cast_and_return(i1 %c) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cast_and_return ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = zext i1 [[C]] to i32 @@ -1394,7 +1394,7 @@ define internal i32 @cast_and_return(i1 %c) { } define internal i1 @is_less_than_3(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_3 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 3 @@ -1405,18 +1405,18 @@ define internal i1 @is_less_than_3(i32 %c) { } define i1 @check_casted_range(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@check_casted_range ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@check_casted_range ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @cast_and_return(i1 noundef true) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @cast_and_return(i1 [[C]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @cast_and_return(i1 noundef true) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @cast_and_return(i1 [[C]]) #[[ATTR5]] ; CGSCC-NEXT: [[ADD:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; CGSCC-NEXT: [[RET:%.*]] = call i1 @is_less_than_3(i32 [[ADD]]) #[[ATTR6]] +; CGSCC-NEXT: [[RET:%.*]] = call i1 @is_less_than_3(i32 [[ADD]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[RET]] ; %csret1 = call i32 @cast_and_return(i1 true) @@ -1427,7 +1427,7 @@ define i1 @check_casted_range(i1 %c) { } define internal i32 @less_than_100_1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_100_1 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1482,7 +1482,7 @@ otherwise: } define internal i1 @is_less_than_100_1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_100_1 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 @@ -1493,16 +1493,16 @@ define internal i1 @is_less_than_100_1(i32 %c) { } define i1 @propagate_range1(i32 %c){ -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@propagate_range1 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@propagate_range1 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @less_than_100_1(i32 [[C]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE:%.*]] = call i1 @is_less_than_100_1(i32 noundef [[CSRET]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @less_than_100_1(i32 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE:%.*]] = call i1 @is_less_than_100_1(i32 noundef [[CSRET]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[TRUE]] ; %csret = call i32 @less_than_100_1(i32 %c) @@ -1512,7 +1512,7 @@ define i1 @propagate_range1(i32 %c){ define internal i32 @less_than_100_2(i32 %c) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@less_than_100_2 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1541,7 +1541,7 @@ define internal i32 @less_than_100_2(i32 %c) { ; TUNIT: otherwise: ; TUNIT-NEXT: ret i32 99 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_100_2 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1597,13 +1597,13 @@ otherwise: define internal i1 @is_less_than_100_2(i32 %c) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@is_less_than_100_2 ; TUNIT-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_100_2 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 @@ -1614,23 +1614,23 @@ define internal i1 @is_less_than_100_2(i32 %c) { } define i1 @propagate_range2(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@propagate_range2 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[CSRET1:%.*]] = call noundef i32 @less_than_100_2(i32 noundef 0) #[[ATTR4]] -; TUNIT-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR4]] -; TUNIT-NEXT: [[CSRET2:%.*]] = call noundef i32 @less_than_100_2(i32 [[C]]) #[[ATTR4]] -; TUNIT-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR4]] +; TUNIT-NEXT: [[CSRET1:%.*]] = call noundef i32 @less_than_100_2(i32 noundef 0) #[[ATTR3]] +; TUNIT-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR3]] +; TUNIT-NEXT: [[CSRET2:%.*]] = call noundef i32 @less_than_100_2(i32 [[C]]) #[[ATTR3]] +; TUNIT-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR3]] ; TUNIT-NEXT: [[TRUE:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; TUNIT-NEXT: ret i1 [[TRUE]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@propagate_range2 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_100_2(i32 noundef 0) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_100_2(i32 [[C]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_100_2(i32 noundef 0) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_100_2(i32 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR5]] ; CGSCC-NEXT: [[TRUE:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; CGSCC-NEXT: ret i1 [[TRUE]] ; @@ -1643,13 +1643,13 @@ define i1 @propagate_range2(i32 %c) { } define internal i1 @non_zero(i8 %v) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@non_zero ; TUNIT-SAME: (i8 [[V:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp ne i8 [[V]], 0 ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@non_zero ; CGSCC-SAME: (i8 [[V:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp ne i8 [[V]], 0 @@ -1661,26 +1661,26 @@ define internal i1 @non_zero(i8 %v) { ; Avoid range metadata for %l below define i1 @context(i8* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@context ; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: [[C:%.*]] = icmp slt i8 0, [[L]] ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR4]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR3]] ; TUNIT-NEXT: ret i1 [[R]] ; TUNIT: f: ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@context ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; CGSCC-NEXT: [[C:%.*]] = icmp slt i8 0, [[L]] ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; CGSCC: f: ; CGSCC-NEXT: ret i1 false @@ -1759,7 +1759,7 @@ bb3: ; preds = %bb2, %bb1 } define i1 @loop_1(i32 %N) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@loop_1 ; TUNIT-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -1774,7 +1774,7 @@ define i1 @loop_1(i32 %N) { ; TUNIT-NEXT: [[R:%.*]] = icmp sle i32 [[I]], 5 ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@loop_1 ; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -1810,19 +1810,17 @@ declare void @barney(i32 signext, i32 signext) !0 = !{i32 0, i32 10} !1 = !{i32 10, i32 100} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone } -; CGSCC: attributes #[[ATTR5]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } ;. ; TUNIT: [[RNG0]] = !{i32 0, i32 10} ; TUNIT: [[RNG1]] = !{i32 10, i32 100} diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 4599dd2abb16d..e528007cada60 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -33,7 +33,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@external_ret2_nrw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -43,7 +43,7 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32* [[W0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@external_ret2_nrw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -62,7 +62,7 @@ entry: } define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret0_nw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -87,7 +87,7 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret0_nw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -104,8 +104,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; CGSCC-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -139,7 +139,7 @@ return: ; preds = %if.end, %if.then } define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret1_rrw ; TUNIT-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree align 4 [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -167,7 +167,7 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret1_rrw ; CGSCC-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree align 4 [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -187,8 +187,8 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { ; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; CGSCC-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -225,7 +225,7 @@ return: ; preds = %if.end, %if.then } define i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@external_sink_ret2_nrw ; CHECK-SAME: (i32* nofree [[N0:%.*]], i32* nocapture nofree readonly [[R0:%.*]], i32* nofree returned writeonly "no-capture-maybe-returned" [[W0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -257,7 +257,7 @@ return: ; preds = %if.end, %if.then } define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret1_rw ; TUNIT-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -279,7 +279,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret1_rw ; CGSCC-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -294,7 +294,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { ; CGSCC-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -325,7 +325,7 @@ return: ; preds = %if.end, %if.then } define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -333,11 +333,11 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32* [[W0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly [[W0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly [[W0]]) #[[ATTR5:[0-9]+]] ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; CGSCC-NEXT: ret i32* [[CALL1]] ; @@ -350,15 +350,16 @@ entry: ; Verify that we see only expected attribute sets, the above lines only check ; for a subset relation. ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind } ; CGSCC: attributes #[[ATTR3]] = { nounwind } -; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR4]] = { nounwind memory(readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 2ef52f86827d3..d3936a18a3983 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -26,7 +26,7 @@ define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) { } define i8* @test2(i8* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @x, align 4 @@ -37,7 +37,7 @@ define i8* @test2(i8* %p) { } define i1 @test3(i8* %p, i8* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (i8* nofree readnone [[P:%.*]], i8* nofree readnone [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = icmp ult i8* [[P]], [[Q]] @@ -50,10 +50,10 @@ define i1 @test3(i8* %p, i8* %q) { declare void @test4_1(i8* nocapture) readonly define void @test4_2(i8* %p) { -; CHECK: Function Attrs: readonly +; CHECK: Function Attrs: memory(read) ; CHECK-LABEL: define {{[^@]+}}@test4_2 ; CHECK-SAME: (i8* nocapture readonly [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @test4_1(i8* nocapture readonly [[P]]) #[[ATTR2]] +; CHECK-NEXT: call void @test4_1(i8* nocapture readonly [[P]]) ; CHECK-NEXT: ret void ; call void @test4_1(i8* %p) @@ -62,7 +62,7 @@ define void @test4_2(i8* %p) { ; Missed optz'n: we could make %q readnone, but don't break test6! define void @test5(i8** %p, i8* %q) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (i8** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* nofree writeonly [[Q:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: store i8* [[Q]], i8** [[P]], align 8 @@ -88,7 +88,7 @@ define void @test6_2(i8** %p, i8* %q) { ; inalloca parameters are always considered written define void @test7_1(i32* inalloca(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test7_1 ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly inalloca(i32) dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret void @@ -97,7 +97,7 @@ define void @test7_1(i32* inalloca(i32) %a) { } define i32* @test8_1(i32* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8_1 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -108,14 +108,14 @@ entry: } define void @test8_2(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test8_2 ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 10, i32* [[P]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test8_2 ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -135,16 +135,16 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, ; CHECK-NOT: readnone ; CHECK-NOT: readonly define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test9 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test9 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR13]] ; CGSCC-NEXT: ret void ; call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1>) @@ -154,16 +154,16 @@ define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { ; CHECK: declare <4 x i32> @llvm.masked.gather declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @test10(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test10 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR13:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR12]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test10 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR7:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR15:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR13]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1>, <4 x i32>undef) @@ -173,16 +173,16 @@ define <4 x i32> @test10(<4 x i32*> %ptrs) { ; CHECK: declare <4 x i32> @test11_1 declare <4 x i32> @test11_1(<4 x i32*>) argmemonly nounwind readonly define <4 x i32> @test11_2(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: argmemonly nounwind readonly +; TUNIT: Function Attrs: nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test11_2 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR7:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR13:[0-9]+]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: argmemonly nounwind readonly +; CGSCC: Function Attrs: nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test11_2 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR8:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @test11_1(<4 x i32*> %ptrs) @@ -192,16 +192,16 @@ define <4 x i32> @test11_2(<4 x i32*> %ptrs) { declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind ; CHECK-NOT: readnone define <4 x i32> @test12_2(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: argmemonly nounwind +; TUNIT: Function Attrs: nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test12_2 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR8:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR14:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR13]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: argmemonly nounwind +; CGSCC: Function Attrs: nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test12_2 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR16:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR14]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs) @@ -209,13 +209,13 @@ define <4 x i32> @test12_2(<4 x i32*> %ptrs) { } define i32 @volatile_load(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@volatile_load ; TUNIT-SAME: (i32* nofree noundef align 4 [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[P]], align 4 ; TUNIT-NEXT: ret i32 [[LOAD]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@volatile_load ; CGSCC-SAME: (i32* nofree noundef align 4 [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[P]], align 4 @@ -271,7 +271,7 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { declare void @escape_i8(i8* %ptr) define void @byval_not_readonly_1(i8* byval(i8) %written) readonly { -; CHECK: Function Attrs: readonly +; CHECK: Function Attrs: memory(read) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readonly_1 ; CHECK-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) @@ -282,7 +282,7 @@ define void @byval_not_readonly_1(i8* byval(i8) %written) readonly { } define void @byval_not_readonly_2(i8* byval(i8) %written) readonly { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readonly_2 ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -293,13 +293,13 @@ define void @byval_not_readonly_2(i8* byval(i8) %written) readonly { } define void @byval_not_readnone_1(i8* byval(i8) %written) readnone { -; TUNIT: Function Attrs: readnone +; TUNIT: Function Attrs: memory(none) ; TUNIT-LABEL: define {{[^@]+}}@byval_not_readnone_1 ; TUNIT-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR10:[0-9]+]] { ; TUNIT-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readnone +; CGSCC: Function Attrs: memory(none) ; CGSCC-LABEL: define {{[^@]+}}@byval_not_readnone_1 ; CGSCC-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) @@ -310,7 +310,7 @@ define void @byval_not_readnone_1(i8* byval(i8) %written) readnone { } define void @byval_not_readnone_2(i8* byval(i8) %written) readnone { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readnone_2 ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -321,7 +321,7 @@ define void @byval_not_readnone_2(i8* byval(i8) %written) readnone { } define void @byval_no_fnarg(i8* byval(i8) %written) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@byval_no_fnarg ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -334,16 +334,16 @@ define void @byval_no_fnarg(i8* byval(i8) %written) { define void @testbyval(i8* %read_only) { ; TUNIT-LABEL: define {{[^@]+}}@testbyval ; TUNIT-SAME: (i8* nocapture readonly [[READ_ONLY:%.*]]) { -; TUNIT-NEXT: call void @byval_not_readonly_1(i8* nocapture readonly byval(i8) [[READ_ONLY]]) #[[ATTR2]] +; TUNIT-NEXT: call void @byval_not_readonly_1(i8* nocapture readonly byval(i8) [[READ_ONLY]]) ; TUNIT-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture readnone byval(i8) [[READ_ONLY]]) -; TUNIT-NEXT: call void @byval_no_fnarg(i8* nocapture nofree readonly byval(i8) [[READ_ONLY]]) #[[ATTR15:[0-9]+]] +; TUNIT-NEXT: call void @byval_no_fnarg(i8* nocapture nofree readonly byval(i8) [[READ_ONLY]]) #[[ATTR13]] ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@testbyval ; CGSCC-SAME: (i8* nocapture noundef nonnull readonly dereferenceable(1) [[READ_ONLY:%.*]]) { -; CGSCC-NEXT: call void @byval_not_readonly_1(i8* noalias nocapture noundef nonnull readonly byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR2]] +; CGSCC-NEXT: call void @byval_not_readonly_1(i8* noalias nocapture noundef nonnull readonly byval(i8) dereferenceable(1) [[READ_ONLY]]) ; CGSCC-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) -; CGSCC-NEXT: call void @byval_no_fnarg(i8* noalias nocapture nofree noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @byval_no_fnarg(i8* noalias nocapture nofree noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR14]] ; CGSCC-NEXT: ret void ; call void @byval_not_readonly_1(i8* byval(i8) %read_only) @@ -360,18 +360,18 @@ declare i8 @maybe_returned_val(i8* %ptr) readonly nounwind declare void @val_use(i8 %ptr) readonly nounwind define void @ptr_uses(i8* %ptr) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@ptr_uses -; TUNIT-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR11]] { -; TUNIT-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR11]] -; TUNIT-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR11]] +; TUNIT-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR11:[0-9]+]] { +; TUNIT-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR13]] +; TUNIT-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR13]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@ptr_uses -; CGSCC-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR12]] { -; CGSCC-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR12]] -; CGSCC-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR12]] +; CGSCC-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; CGSCC-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR14]] +; CGSCC-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR14]] ; CGSCC-NEXT: ret void ; %call_ptr = call i8* @maybe_returned_ptr(i8* %ptr) @@ -410,7 +410,7 @@ define void @ptr_use_chain(i8* %ptr) { @constant_mem = external dso_local constant i32, align 4 define i32 @read_only_constant_mem() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@read_only_constant_mem ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[L:%.*]] = load i32, i32* @constant_mem, align 4 @@ -420,39 +420,34 @@ define i32 @read_only_constant_mem() { ret i32 %l } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { readonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nounwind readonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nounwind } -; TUNIT: attributes #[[ATTR9]] = { argmemonly nofree norecurse nounwind willreturn } -; TUNIT: attributes #[[ATTR10]] = { readnone } -; TUNIT: attributes #[[ATTR11]] = { nounwind readonly } -; TUNIT: attributes #[[ATTR12]] = { willreturn writeonly } -; TUNIT: attributes #[[ATTR13]] = { readonly willreturn } -; TUNIT: attributes #[[ATTR14]] = { nounwind } -; TUNIT: attributes #[[ATTR15]] = { nounwind writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { memory(read) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR7]] = { nounwind memory(argmem: read) } +; TUNIT: attributes #[[ATTR8]] = { nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR10]] = { memory(none) } +; TUNIT: attributes #[[ATTR11]] = { nounwind memory(read) } +; TUNIT: attributes #[[ATTR12]] = { willreturn } +; TUNIT: attributes #[[ATTR13]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { argmemonly nounwind readonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nounwind } -; CGSCC: attributes #[[ATTR10]] = { argmemonly nofree norecurse nounwind willreturn } -; CGSCC: attributes #[[ATTR11]] = { readnone } -; CGSCC: attributes #[[ATTR12]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR13]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR14]] = { willreturn writeonly } -; CGSCC: attributes #[[ATTR15]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR16]] = { nounwind } -; CGSCC: attributes #[[ATTR17]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { memory(read) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nounwind memory(argmem: read) } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR11]] = { memory(none) } +; CGSCC: attributes #[[ATTR12]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR13]] = { willreturn } +; CGSCC: attributes #[[ATTR14]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index 5e65e826f96cc..555ffe113c3d0 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -41,7 +41,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[_ZTI1Y:[a-zA-Z0-9_$"\\.-]+]] = external dso_local constant { i8*, i8*, i8* }, align 8 ;. define i32 @sink_r0(i32 %r) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@sink_r0 ; CHECK-SAME: (i32 returned [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -52,14 +52,14 @@ entry: } define i32 @scc_r1(i32 %a, i32 %r, i32 %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_r1 ; TUNIT-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[R]]) #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_r1 ; CGSCC-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -73,7 +73,7 @@ entry: } define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_r2 ; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 returned [[R:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -108,7 +108,7 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_r2 ; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 returned [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -188,7 +188,7 @@ return: ; preds = %cond.end, %if.then3 } define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_rX ; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[R:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -223,7 +223,7 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_rX ; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -325,7 +325,7 @@ return: ; preds = %cond.end, %if.then3 ; return a == b ? r : ptr_scc_r2(a, b, r); ; } define double* @ptr_sink_r0(double* %r) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ptr_sink_r0 ; CHECK-SAME: (double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -336,14 +336,14 @@ entry: } define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@ptr_scc_r1 ; TUNIT-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR10]] ; TUNIT-NEXT: ret double* [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@ptr_scc_r1 ; CGSCC-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -357,7 +357,7 @@ entry: } define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@ptr_scc_r2 ; TUNIT-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -392,7 +392,7 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret double* [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@ptr_scc_r2 ; CGSCC-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -479,18 +479,18 @@ return: ; preds = %cond.end, %if.then3 ; } ; define i32* @rt0(i32* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt0 ; TUNIT-SAME: (i32* nofree noundef nonnull readonly returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt0 ; CGSCC-SAME: (i32* nofree noundef nonnull readonly returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR9:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret i32* [[A]] ; entry: @@ -508,13 +508,13 @@ entry: ; } ; define i32* @rt1(i32* %a) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt1 ; TUNIT-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt1 ; CGSCC-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -531,14 +531,14 @@ entry: ; TEST another SCC test ; define i32* @rt2_helper(i32* %a) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt2_helper ; TUNIT-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[A]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt2_helper ; CGSCC-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -551,7 +551,7 @@ entry: } define i32* @rt2(i32* %a, i32 *%b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt2 ; TUNIT-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -564,7 +564,7 @@ define i32* @rt2(i32* %a, i32 *%b) #0 { ; TUNIT-NEXT: [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[A]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[SEL]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt2 ; CGSCC-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -593,14 +593,14 @@ if.end: ; TEST another SCC test ; define i32* @rt3_helper(i32* %a, i32* %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt3_helper ; TUNIT-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt3(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt3_helper ; CGSCC-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -613,7 +613,7 @@ entry: } define i32* @rt3(i32* %a, i32 *%b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt3 ; TUNIT-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -626,7 +626,7 @@ define i32* @rt3(i32* %a, i32 *%b) #0 { ; TUNIT-NEXT: [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt3 ; CGSCC-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -667,13 +667,13 @@ define i32* @calls_unknown_fn(i32* %r) #0 { ; TUNIT: Function Attrs: noinline nounwind uwtable ; TUNIT-LABEL: define {{[^@]+}}@calls_unknown_fn ; TUNIT-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR5:[0-9]+]] { -; TUNIT-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i32* [[R]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_unknown_fn ; CGSCC-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR4:[0-9]+]] { -; CGSCC-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR10:[0-9]+]] +; CGSCC-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[R]] ; tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) @@ -716,14 +716,14 @@ define i32* @calls_maybe_redefined_fn(i32* %r) #0 { ; TUNIT-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn ; TUNIT-SAME: (i32* returned [[R:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR12]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[R]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn ; CGSCC-SAME: (i32* returned [[R:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[R]] ; entry: @@ -765,14 +765,14 @@ define i32* @calls_maybe_redefined_fn2(i32* %r) #0 { ; TUNIT-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn2 ; TUNIT-SAME: (i32* [[R:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR12]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn2 ; CGSCC-SAME: (i32* [[R:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -791,7 +791,7 @@ entry: ; } ; define double @select_and_phi(double %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@select_and_phi ; CHECK-SAME: (double returned [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -827,7 +827,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double @recursion_select_and_phi(i32 %a, double %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@recursion_select_and_phi ; TUNIT-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -840,7 +840,7 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 { ; TUNIT: if.end: ; TUNIT-NEXT: ret double [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@recursion_select_and_phi ; CGSCC-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -877,7 +877,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double* @bitcast(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -900,7 +900,7 @@ entry: ; } ; define double* @bitcasts_select_and_phi(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@bitcasts_select_and_phi ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -950,7 +950,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double* @ret_arg_arg_undef(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_arg_arg_undef ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -998,7 +998,7 @@ ret_undef: ; } ; define double* @ret_undef_arg_arg(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_undef_arg_arg ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1046,7 +1046,7 @@ ret_arg1: ; } ; define double* @ret_undef_arg_undef(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_undef_arg_undef ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1179,7 +1179,7 @@ r: ; TEST inconsistent IR in dead code. ; define i32 @deadblockcall1(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockcall1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1197,7 +1197,7 @@ unreachableblock: declare i32 @deadblockcall_helper(i32 returned %A); define i32 @deadblockcall2(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockcall2 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1218,7 +1218,7 @@ unreachableblock2: } define i32 @deadblockphi1(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockphi1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1244,7 +1244,7 @@ r: } define i32 @deadblockphi2(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockphi2 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1410,7 +1410,7 @@ define i32 @exact(i32* align 8 %a, i32* align 8 %b) { @G = external global i8 define i32* @ret_const() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_const ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32* bitcast (i8* @G to i32*) @@ -1419,30 +1419,30 @@ define i32* @ret_const() #0 { ret i32* %bc } define i32* @use_const() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@use_const ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* bitcast (i8* @G to i32*) ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@use_const ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR11:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR9:[0-9]+]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = call i32* @ret_const() ret i32* %c } define i32* @dont_use_const() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@dont_use_const ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* bitcast (i8* @G to i32*) ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@dont_use_const ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = musttail call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR11]] +; CGSCC-NEXT: [[C:%.*]] = musttail call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR9]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = musttail call i32* @ret_const() @@ -1494,31 +1494,27 @@ declare dso_local i8* @__dynamic_cast(i8*, i8*, i8*, i64) attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree noinline nosync nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline nosync nounwind memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; TUNIT: attributes #[[ATTR5]] = { noinline nounwind uwtable } ; TUNIT: attributes #[[ATTR6]] = { noinline norecurse nounwind uwtable } ; TUNIT: attributes #[[ATTR7]] = { noreturn } ; TUNIT: attributes #[[ATTR8]] = { norecurse } -; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind readonly } -; TUNIT: attributes #[[ATTR12]] = { nounwind } -; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR11]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree noinline nosync nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR4]] = { noinline nounwind uwtable } ; CGSCC: attributes #[[ATTR5]] = { noreturn } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR8]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind readonly } -; CGSCC: attributes #[[ATTR10]] = { nounwind } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR8]] = { nounwind } +; CGSCC: attributes #[[ATTR9]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index a54b067acb0fa..23384c705debc 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; -- Load tests -- define void @load_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_wholly_unreachable ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: unreachable @@ -21,7 +21,7 @@ define void @load_wholly_unreachable() { } define void @loads_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@loads_wholly_unreachable ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: unreachable @@ -33,7 +33,7 @@ define void @loads_wholly_unreachable() { define void @load_single_bb_unreachable(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_single_bb_unreachable ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -53,7 +53,7 @@ e: ; Note that while the load is removed (because it's unused), the block ; is not changed to unreachable define void @load_null_pointer_is_defined() null_pointer_is_valid { -; CHECK: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_null_pointer_is_defined ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret void @@ -63,7 +63,7 @@ define void @load_null_pointer_is_defined() null_pointer_is_valid { } define internal i32* @ret_null() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_null ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* null @@ -72,12 +72,12 @@ define internal i32* @ret_null() { } define void @load_null_propagated() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@load_null_propagated ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@load_null_propagated ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: ret void @@ -90,7 +90,7 @@ define void @load_null_propagated() { ; -- Store tests -- define void @store_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@store_wholly_unreachable ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: unreachable @@ -100,13 +100,13 @@ define void @store_wholly_unreachable() { } define void @store_wholly_unreachable_volatile() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@store_wholly_unreachable_volatile ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: store volatile i32 5, i32* null, align 4294967296 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@store_wholly_unreachable_volatile ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: store volatile i32 5, i32* null, align 4294967296 @@ -117,7 +117,7 @@ define void @store_wholly_unreachable_volatile() { } define void @store_single_bb_unreachable(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@store_single_bb_unreachable ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -135,13 +135,13 @@ e: } define void @store_null_pointer_is_defined() null_pointer_is_valid { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@store_null_pointer_is_defined ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: store i32 5, i32* null, align 4294967296 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@store_null_pointer_is_defined ; CGSCC-SAME: () #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: store i32 5, i32* null, align 4294967296 @@ -155,12 +155,12 @@ define void @store_null_propagated() { ; ATTRIBUTOR-LABEL: @store_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@store_null_propagated ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@store_null_propagated ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[PTR:%.*]] = call noalias align 4294967296 i32* @ret_null() #[[ATTR10:[0-9]+]] @@ -174,12 +174,12 @@ define void @store_null_propagated() { ; -- AtomicRMW tests -- define void @atomicrmw_wholly_unreachable() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_wholly_unreachable ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomicrmw_wholly_unreachable ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: unreachable @@ -189,7 +189,7 @@ define void @atomicrmw_wholly_unreachable() { } define void @atomicrmw_single_bb_unreachable(i1 %cond) { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_single_bb_unreachable ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -198,7 +198,7 @@ define void @atomicrmw_single_bb_unreachable(i1 %cond) { ; TUNIT: e: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomicrmw_single_bb_unreachable ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -236,7 +236,7 @@ define void @atomicrmw_null_propagated() { ; ATTRIBUTOR-LABEL: @atomicrmw_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_null_propagated ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable @@ -256,12 +256,12 @@ define void @atomicrmw_null_propagated() { ; -- AtomicCmpXchg tests -- define void @atomiccmpxchg_wholly_unreachable() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_wholly_unreachable ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomiccmpxchg_wholly_unreachable ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: unreachable @@ -271,7 +271,7 @@ define void @atomiccmpxchg_wholly_unreachable() { } define void @atomiccmpxchg_single_bb_unreachable(i1 %cond) { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_single_bb_unreachable ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -280,7 +280,7 @@ define void @atomiccmpxchg_single_bb_unreachable(i1 %cond) { ; TUNIT: e: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomiccmpxchg_single_bb_unreachable ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -318,7 +318,7 @@ define void @atomiccmpxchg_null_propagated() { ; ATTRIBUTOR-LABEL: @atomiccmpxchg_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_null_propagated ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable @@ -340,7 +340,7 @@ define void @atomiccmpxchg_null_propagated() { ; Note: The unreachable on %t and %e is _not_ from AAUndefinedBehavior define i32 @cond_br_on_undef() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef ; TUNIT-SAME: () #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: unreachable @@ -349,7 +349,7 @@ define i32 @cond_br_on_undef() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: unreachable @@ -369,7 +369,7 @@ e: ; Valid branch - verify that this is not converted ; to unreachable. define void @cond_br_on_undef2(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cond_br_on_undef2 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T1:%.*]], label [[E1:%.*]] @@ -394,7 +394,7 @@ e1: } define i1 @ret_undef() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret_undef ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i1 undef @@ -403,7 +403,7 @@ define i1 @ret_undef() { } define void @cond_br_on_undef_interproc() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -412,7 +412,7 @@ define void @cond_br_on_undef_interproc() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[COND:%.*]] = call i1 @ret_undef() #[[ATTR10]] @@ -431,7 +431,7 @@ e: } define i1 @ret_undef2() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret_undef2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: br i1 true, label [[T:%.*]], label [[E:%.*]] @@ -449,7 +449,7 @@ e: ; More complicated interproc deduction of undef define void @cond_br_on_undef_interproc2() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc2 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -458,7 +458,7 @@ define void @cond_br_on_undef_interproc2() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[COND:%.*]] = call i1 @ret_undef2() #[[ATTR10]] @@ -479,7 +479,7 @@ e: ; Branch on undef that depends on propagation of ; undef of a previous instruction. define i32 @cond_br_on_undef3() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cond_br_on_undef3 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 1, undef @@ -500,7 +500,7 @@ e: ; Branch on undef because of uninitialized value. ; FIXME: Currently it doesn't propagate the undef. define i32 @cond_br_on_undef_uninit() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_uninit ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -509,7 +509,7 @@ define i32 @cond_br_on_undef_uninit() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_uninit ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: unreachable @@ -533,7 +533,7 @@ e: ; MODULE-NOT: @callee( define internal i32 @callee(i1 %C, i32* %A) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -555,12 +555,12 @@ F: } define i32 @foo() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[X:%.*]] = call noundef i32 @callee() #[[ATTR10]] @@ -575,13 +575,13 @@ define i32 @foo() { ; Tests for argument position define void @arg_nonnull_1(i32* nonnull %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: store i32 0, i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: store i32 0, i32* [[A]], align 4 @@ -592,13 +592,13 @@ define void @arg_nonnull_1(i32* nonnull %a) { } define void @arg_nonnull_1_noundef_1(i32* nonnull noundef %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: store i32 0, i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: store i32 0, i32* [[A]], align 4 @@ -609,7 +609,7 @@ define void @arg_nonnull_1_noundef_1(i32* nonnull noundef %a) { } define void @arg_nonnull_12(i32* nonnull %a, i32* nonnull %b, i32* %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_12 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -623,7 +623,7 @@ define void @arg_nonnull_12(i32* nonnull %a, i32* nonnull %b, i32* %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_12 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -650,7 +650,7 @@ ret: } define void @arg_nonnull_12_noundef_2(i32* nonnull %a, i32* noundef nonnull %b, i32* %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -664,7 +664,7 @@ define void @arg_nonnull_12_noundef_2(i32* nonnull %a, i32* noundef nonnull %b, ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -692,12 +692,12 @@ ret: ; Pass null directly to argument with nonnull attribute define void @arg_nonnull_violation1_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -707,12 +707,12 @@ define void @arg_nonnull_violation1_1() { } define void @arg_nonnull_violation1_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -723,12 +723,12 @@ define void @arg_nonnull_violation1_2() { ; A case that depends on value simplification define void @arg_nonnull_violation2_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -740,12 +740,12 @@ define void @arg_nonnull_violation2_1(i1 %c) { } define void @arg_nonnull_violation2_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -758,7 +758,7 @@ define void @arg_nonnull_violation2_2(i1 %c) { ; Cases for single and multiple violation at a callsite define void @arg_nonnull_violation3_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -772,7 +772,7 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -805,7 +805,7 @@ ret: } define void @arg_nonnull_violation3_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -819,7 +819,7 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -854,7 +854,7 @@ ret: ; Tests for returned position define nonnull i32* @returned_nonnnull(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -881,7 +881,7 @@ ondefault: } define noundef i32* @returned_noundef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_noundef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -908,7 +908,7 @@ ondefault: } define nonnull noundef i32* @returned_nonnnull_noundef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull_noundef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -935,7 +935,7 @@ ondefault: } define noundef i32 @returned_nonnnull_noundef_int() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull_noundef_int ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -965,7 +965,7 @@ define void @callsite_noundef_2() { } define i32 @argument_noundef1(i32 noundef %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@argument_noundef1 ; CHECK-SAME: (i32 noundef returned [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32 [[C]] @@ -974,12 +974,12 @@ define i32 @argument_noundef1(i32 noundef %c) { } define i32 @violate_noundef_nonpointer() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@violate_noundef_nonpointer ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@violate_noundef_nonpointer ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -989,7 +989,7 @@ define i32 @violate_noundef_nonpointer() { } define i32* @argument_noundef2(i32* noundef %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@argument_noundef2 ; CHECK-SAME: (i32* nofree noundef readnone returned "no-capture-maybe-returned" [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[C]] @@ -998,12 +998,12 @@ define i32* @argument_noundef2(i32* noundef %c) { } define i32* @violate_noundef_pointer() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@violate_noundef_pointer ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@violate_noundef_pointer ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i32* undef @@ -1013,7 +1013,7 @@ define i32* @violate_noundef_pointer() { } define internal noundef i32 @assumed_undef_is_ok(i1 %c, i32 %arg) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@assumed_undef_is_ok ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[RET:%.*]] @@ -1035,12 +1035,12 @@ ret: } define noundef i32 @assumed_undef_is_ok_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@assumed_undef_is_ok_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@assumed_undef_is_ok_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @assumed_undef_is_ok(i1 [[C]]) #[[ATTR10]] @@ -1051,25 +1051,25 @@ define noundef i32 @assumed_undef_is_ok_caller(i1 %c) { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) } ; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nounwind null_pointer_is_valid willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nounwind null_pointer_is_valid willreturn } ; CGSCC: attributes #[[ATTR7]] = { nofree nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { willreturn } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll index 260626f383437..a44a5f618d4c8 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll @@ -7,7 +7,7 @@ declare void @useI1p(i1*) declare void @unknown() define i1 @readI1p(i1* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@readI1p ; CHECK-SAME: (i1* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i1, i1* [[P]], align 1 @@ -37,13 +37,13 @@ define i1 @keep_assume_1c_nr() norecurse { } define i1 @drop_assume_1c_nr() norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5:[0-9]+]] @@ -156,7 +156,7 @@ define i1 @keep_assume_1_nr(i1 %arg) norecurse { } define i1 @drop_assume_1_nr(i1 %arg) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1_nr ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -164,7 +164,7 @@ define i1 @drop_assume_1_nr(i1 %arg) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1_nr ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -261,7 +261,7 @@ define i1 @keep_assume_4_nr(i1 %arg) norecurse { } define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1_nr ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -275,7 +275,7 @@ define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT: m: ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1_nr ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -305,7 +305,7 @@ m: } define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1b_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -320,7 +320,7 @@ define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT: m: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1b_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -351,7 +351,7 @@ m: } define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -368,7 +368,7 @@ define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[L]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -401,7 +401,7 @@ m: } define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2b_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -418,7 +418,7 @@ define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2b_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -451,7 +451,7 @@ m: } define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -469,7 +469,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_3_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -484,7 +484,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -504,7 +504,7 @@ m: } define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -522,7 +522,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_4_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -537,7 +537,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -557,7 +557,7 @@ m: } define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -581,7 +581,7 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -602,7 +602,7 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -628,7 +628,7 @@ m: } define i1 @assume_5c_nr(i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -652,7 +652,7 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5c_nr ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -673,7 +673,7 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -717,13 +717,13 @@ define i1 @keep_assume_1c() { } define i1 @drop_assume_1c() { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c ; TUNIT-SAME: () #[[ATTR3]] { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4]] ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5]] @@ -825,7 +825,7 @@ define i1 @keep_assume_1(i1 %arg) { } define i1 @drop_assume_1(i1 %arg) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1 ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -833,7 +833,7 @@ define i1 @drop_assume_1(i1 %arg) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1 ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -925,7 +925,7 @@ define i1 @keep_assume_4(i1 %arg) { } define i1 @assume_1(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1 ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -939,7 +939,7 @@ define i1 @assume_1(i1 %arg, i1 %cond) { ; TUNIT: m: ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1 ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -969,7 +969,7 @@ m: } define void @assume_1b(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1b ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -984,7 +984,7 @@ define void @assume_1b(i1 %arg, i1 %cond) { ; TUNIT: m: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1b ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1015,7 +1015,7 @@ m: } define i1 @assume_2(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1032,7 +1032,7 @@ define i1 @assume_2(i1 %arg, i1 %cond) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[L]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1065,7 +1065,7 @@ m: } define void @assume_2b(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2b ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1082,7 +1082,7 @@ define void @assume_2b(i1 %arg, i1 %cond) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2b ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1115,7 +1115,7 @@ m: } define i1 @assume_3(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_3 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1133,7 +1133,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_3 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1148,7 +1148,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1168,7 +1168,7 @@ m: } define i1 @assume_4(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_4 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1186,7 +1186,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_4 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1201,7 +1201,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1221,7 +1221,7 @@ m: } define i1 @assume_5(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1245,7 +1245,7 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1266,7 +1266,7 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1292,7 +1292,7 @@ m: } define i1 @assume_5c(i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5c ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1316,7 +1316,7 @@ define i1 @assume_5c(i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5c ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1337,7 +1337,7 @@ define i1 @assume_5c(i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1363,18 +1363,17 @@ m: } ;. -; TUNIT: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; TUNIT: attributes #[[ATTR2]] = { norecurse } -; TUNIT: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; TUNIT: attributes #[[ATTR4]] = { willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; CGSCC: attributes #[[ATTR2]] = { norecurse } -; CGSCC: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { inaccessiblememonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR5]] = { willreturn } -; CGSCC: attributes #[[ATTR6]] = { readonly willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-dbg.ll b/llvm/test/Transforms/Attributor/value-simplify-dbg.ll index 10a84d6458c67..f1e887d7fe83e 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-dbg.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-dbg.ll @@ -24,7 +24,7 @@ entry: declare void @use(i32 noundef) define void @src() norecurse !dbg !22 { -; CHECK: Function Attrs: norecurse nosync writeonly +; CHECK: Function Attrs: norecurse nosync memory(write) ; CHECK-LABEL: define {{[^@]+}}@src ; CHECK-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG22:![0-9]+]] { ; CHECK-NEXT: entry: @@ -73,8 +73,8 @@ declare i32 @speculatable() speculatable readnone !24 = !DILocation(line: 10, column: 7, scope: !22) !25 = !DILocation(line: 11, column: 1, scope: !22) ;. -; CHECK: attributes #[[ATTR0]] = { norecurse nosync writeonly } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { readnone speculatable } +; CHECK: attributes #[[ATTR0]] = { norecurse nosync memory(write) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { speculatable memory(none) } ;. ; CHECK: [[DBG0]] = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) ; CHECK: [[META1:![0-9]+]] = distinct !DIGlobalVariable(name: "G", scope: !2, file: !5, line: 1, type: !6, isLocal: true, isDefinition: true) diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index a2e4c3114e317..7ae99e48ff8e6 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -50,7 +50,7 @@ define internal void @level1Kernel(i32 %C) { ; TUNIT-NEXT: call void @level2Kernelb() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: call void @level2Kernelall_late() #[[ATTR3]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse nosync nounwind @@ -67,7 +67,7 @@ define internal void @level1Kernel(i32 %C) { ; CGSCC-NEXT: call void @level2Kernelb() #[[ATTR4]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: call void @level2Kernelall_late() #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: call void @level2Kernelall_late() #[[ATTR4]] ; CGSCC-NEXT: ret void ; entry: @@ -89,7 +89,7 @@ if.end: ; preds = %if.else, %if.then } define internal void @level2Kernelall_early() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_early ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -110,7 +110,7 @@ define internal void @level2Kernela() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -138,7 +138,7 @@ define internal void @level2Kernelb() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -160,13 +160,13 @@ entry: } define internal void @level2Kernelall_late() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -217,7 +217,7 @@ define internal void @level1(i32 %C) { ; TUNIT-NEXT: call void @level2b() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] +; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse nosync nounwind @@ -235,7 +235,7 @@ define internal void @level1(i32 %C) { ; CGSCC-NEXT: call void @level2b(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR6]] +; CGSCC-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] ; CGSCC-NEXT: ret void ; entry: @@ -258,14 +258,14 @@ if.end: ; preds = %if.else, %if.then } define internal void @level2all_early(i32* %addr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2all_early ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2all_early ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -286,7 +286,7 @@ define internal void @level2a(i32* %addr) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -314,7 +314,7 @@ define internal void @level2b(i32* %addr) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -336,14 +336,14 @@ entry: } define internal void @level2all_late(i32* %addr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2all_late ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2all_late ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -362,17 +362,15 @@ declare dso_local void @use(i32, i32, i32) nosync norecurse nounwind ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; TUNIT: attributes #[[ATTR1]] = { norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; TUNIT: attributes #[[ATTR3]] = { nosync nounwind } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5]] = { nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR6]] = { nounwind } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR5]] = { nounwind } ;. ; CGSCC: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; CGSCC: attributes #[[ATTR1]] = { norecurse nosync nounwind } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR3]] = { nosync nounwind } ; CGSCC: attributes #[[ATTR4]] = { nounwind } -; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-instances.ll b/llvm/test/Transforms/Attributor/value-simplify-instances.ll index 9de4fd59ec581..270673ba23c79 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-instances.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-instances.ll @@ -13,7 +13,7 @@ declare i1* @geti1Ptr() ; CHECK: @[[G3:[a-zA-Z0-9_$"\\.-]+]] = private global i1 undef ;. define internal i1 @recursive_inst_comparator(i1* %a, i1* %b) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive_inst_comparator ; CHECK-SAME: (i1* noalias nofree readnone [[A:%.*]], i1* noalias nofree readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[B]] @@ -103,17 +103,29 @@ define i1 @recursive_inst_compare_caller(i1 %c) { ; Make sure we do *not* return true. define internal i1 @recursive_alloca_compare(i1 %c, i1* %p) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@recursive_alloca_compare -; CHECK-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[A:%.*]] = alloca i1, align 1 -; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; CHECK: t: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] -; CHECK-NEXT: ret i1 [[CMP]] -; CHECK: f: -; CHECK-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR1]] -; CHECK-NEXT: ret i1 [[CALL]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_compare +; TUNIT-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: [[A:%.*]] = alloca i1, align 1 +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: t: +; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] +; TUNIT-NEXT: ret i1 [[CMP]] +; TUNIT: f: +; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: ret i1 [[CALL]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare +; CGSCC-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: [[A:%.*]] = alloca i1, align 1 +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: t: +; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] +; CGSCC-NEXT: ret i1 [[CMP]] +; CGSCC: f: +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: ret i1 [[CALL]] ; %a = alloca i1 br i1 %c, label %t, label %f @@ -127,13 +139,13 @@ f: ; FIXME: This should *not* return true. define i1 @recursive_alloca_compare_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR1]] +; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR4:[0-9]+]] @@ -145,7 +157,7 @@ define i1 @recursive_alloca_compare_caller(i1 %c) { ; Make sure we do *not* simplify this to return 0 or 1, return 42 is ok though. define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_load_return ; TUNIT-SAME: (i1 [[C:%.*]], i8* nocapture nofree nonnull readonly [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -156,10 +168,10 @@ define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: ret i8 [[L]] ; TUNIT: f: -; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR4]] ; TUNIT-NEXT: ret i8 [[CALL]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_load_return ; CGSCC-SAME: (i1 [[C:%.*]], i8* nocapture nofree nonnull readonly [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -170,7 +182,7 @@ define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; CGSCC-NEXT: ret i8 [[L]] ; CGSCC: f: -; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR3]] ; CGSCC-NEXT: ret i8 [[CALL]] ; %a = alloca i8 @@ -186,16 +198,16 @@ f: } define i8 @recursive_alloca_load_return_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_load_return_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR4]] ; TUNIT-NEXT: ret i8 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_load_return_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[CALL]] ; %call = call i8 @recursive_alloca_load_return(i1 %c, i8* undef, i8 42) @@ -259,7 +271,7 @@ define i1 @recursive_alloca_compare_caller_global1(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global1(i1 %c) @@ -318,7 +330,7 @@ define i1 @recursive_alloca_compare_caller_global2(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global2(i1 %c) @@ -374,25 +386,24 @@ define i1 @recursive_inst_compare_caller_global3(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_inst_compare_caller_global3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_inst_compare_global3(i1 %c) ret i1 %call } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR6]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR6]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR4]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR5]] = { nounwind } +; CGSCC: attributes #[[ATTR4]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index 51e7729ba54ea..dd4e2f68bb36f 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -13,7 +13,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 %struct2 = type <{ ptr, i64, i64, i32, [4 x i8] }> define i64 @t1(ptr %first, ptr %first.addr, ptr %0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t1 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST:%.*]], ptr nocapture nofree readnone [[FIRST_ADDR:%.*]], ptr nocapture nofree readnone [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -24,7 +24,7 @@ define i64 @t1(ptr %first, ptr %first.addr, ptr %0) { ; TUNIT-NEXT: [[CALL:%.*]] = call ptr @foo.4(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST]]) #[[ATTR3:[0-9]+]] ; TUNIT-NEXT: ret i64 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t1 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST:%.*]], ptr nocapture nofree readnone [[FIRST_ADDR:%.*]], ptr nocapture nofree readnone [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -49,7 +49,7 @@ if.end: ; preds = %entry } define internal ptr @foo.4(ptr %__first) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@foo.4 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[__FIRST:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -57,7 +57,7 @@ define internal ptr @foo.4(ptr %__first) { ; TUNIT-NEXT: store ptr [[__FIRST]], ptr [[__FIRST]], align 8 ; TUNIT-NEXT: ret ptr undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@foo.4 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[__FIRST:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -75,7 +75,7 @@ entry: } define internal ptr @bar(ptr %QQfirst) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[QQFIRST:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -101,7 +101,7 @@ while.end: ; preds = %while.cond } define ptr @t2(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@t2 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -110,12 +110,12 @@ define ptr @t2(ptr %this, ptr %this.addr, ptr %this1) { ; TUNIT-NEXT: [[TEST_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; TUNIT-NEXT: ret ptr [[TEST_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t2 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @foo.1(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @foo.1(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[TEST_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret ptr [[TEST_RET]] ; @@ -128,23 +128,23 @@ entry: } define internal %S @foo.1(ptr %foo.this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@foo.1 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[FOO_THIS:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 -; TUNIT-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR4]] ; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[FOO_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@foo.1 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[FOO_THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 -; CGSCC-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]] +; CGSCC-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR8:[0-9]+]] ; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[FOO_RET]] ; @@ -157,20 +157,20 @@ entry: } define internal void @bar.2(ptr %bar.this, ptr %bar.data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@bar.2 ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store ptr [[BAR_DATA]], ptr [[BAR_THIS]], align 8 -; TUNIT-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR5]] +; TUNIT-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@bar.2 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[BAR_DATA]], ptr [[BAR_THIS]], align 8 -; CGSCC-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR6]] +; CGSCC-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR8]] ; CGSCC-NEXT: ret void ; entry: @@ -180,14 +180,14 @@ entry: } define internal void @baz(ptr %baz.this, ptr %baz.data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@baz ; TUNIT-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_DATA:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store ptr [[BAZ_DATA]], ptr [[BAZ_THIS]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@baz ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_THIS:%.*]], ptr nofree writeonly [[BAZ_DATA:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -200,7 +200,7 @@ entry: } define ptr @foo(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -209,12 +209,12 @@ define ptr @foo(ptr %this, ptr %this.addr, ptr %this1) { ; TUNIT-NEXT: [[FOO_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; TUNIT-NEXT: ret ptr [[FOO_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @bar.5(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @bar.5(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[FOO_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret ptr [[FOO_RET]] ; @@ -227,7 +227,7 @@ entry: } define internal %S @bar.5(ptr %this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@bar.5 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -237,13 +237,13 @@ define internal %S @bar.5(ptr %this) { ; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[BAR_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@bar.5 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @baz.6(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: call void @baz.6(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]] ; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[BAR_RET]] ; @@ -257,7 +257,7 @@ entry: } define internal void @baz.6(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@baz.6 ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -265,12 +265,12 @@ define internal void @baz.6(ptr %this, ptr %data) { ; TUNIT-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@baz.6 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[DATA]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR8]] +; CGSCC-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR9]] ; CGSCC-NEXT: ret void ; entry: @@ -280,7 +280,7 @@ entry: } define internal void @boom(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@boom ; TUNIT-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -290,7 +290,7 @@ define internal void @boom(ptr %this, ptr %data) { ; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@boom ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree [[DATA:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -371,7 +371,7 @@ declare void @ext1(ptr) ; Taken from https://github.com/llvm/llvm-project/issues/54981 define dso_local void @spam() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@spam ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -402,7 +402,7 @@ define dso_local void @spam() { ; TUNIT: bb35: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@spam ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -475,22 +475,22 @@ bb35: ; preds = %bb16 } define double @t4(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[THIS_ADDR1:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; TUNIT-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR5]] +; TUNIT-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] ; TUNIT-NEXT: ret double 0.000000e+00 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t4 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[THIS_ADDR1:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[TMP0:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret double 0.000000e+00 ; @@ -504,24 +504,24 @@ entry: } define internal %S @t4a(ptr %this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4a ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; TUNIT-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]]) #[[ATTR5]] +; TUNIT-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]]) #[[ATTR4]] ; TUNIT-NEXT: ret [[S]] undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t4a ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] +; CGSCC-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] ; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[TMP0]] ; @@ -539,23 +539,23 @@ entry: } define internal void @t4b(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4b ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 -; TUNIT-NEXT: call void @t4c(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR5]] +; TUNIT-NEXT: call void @t4c(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t4b ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[DATA]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @t4c(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA]]) #[[ATTR6]] +; CGSCC-NEXT: call void @t4c(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA]]) #[[ATTR8]] ; CGSCC-NEXT: ret void ; entry: @@ -570,7 +570,7 @@ entry: } define internal void @t4c(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4c ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -578,7 +578,7 @@ define internal void @t4c(ptr %this, ptr %data) { ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t4c ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree writeonly [[DATA:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: entry: @@ -611,22 +611,22 @@ entry: !6 = !{i32 7, !"Dwarf Version", i32 2} !7 = !{i32 2, !"Debug Info Version", i32 3} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind readnone } -; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR7]] = { willreturn } +; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nounwind willreturn memory(readwrite) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]} ; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll index 28940d7328f3c..537dc304cb797 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll @@ -36,7 +36,7 @@ declare void @harmless_use(ptr nocapture readonly) nofree norecurse nosync nounw ; CHECK: @[[GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = internal constant [[STRUCT_S:%.*]] { i32 42, double 3.140000e+00, ptr null, i32 0 }, align 8 ;. define i32 @testOneFieldGlobalS(i32 %cmpx) { -; CHECK: Function Attrs: nofree norecurse nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS ; CHECK-SAME: (i32 [[CMPX:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -95,7 +95,7 @@ if.end7: ; preds = %if.then5, %if.end4 } define i32 @testOneFieldGlobalS_type_mismatch() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS_type_mismatch ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -152,7 +152,7 @@ if.end7: ; preds = %if.then5, %if.end4 } define i32 @testOneFieldGlobalS_byte_offset_wrong() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS_byte_offset_wrong ; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -210,7 +210,7 @@ if.end7: ; preds = %if.then5, %if.end4 ret i32 %r.2 } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind readnone willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index cd992da4c9f98..55e314d310d09 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -57,7 +57,7 @@ ; CHECK: @[[GLOBAL:[a-zA-Z0-9_$"\\.-]+]] = internal global [[STRUCT_STY:%.*]] zeroinitializer, align 8 ;. define void @write_arg(i32* %p, i32 %v) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@write_arg ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -103,7 +103,7 @@ declare i32 @random(...) ; return r; ; } define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -135,7 +135,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; TUNIT-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR15]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 ; CGSCC-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -538,7 +538,7 @@ for.end38: ; preds = %for.cond.cleanup30 ; } ; define i32 @local_alloca_simplifiable_3() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_3 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -563,7 +563,7 @@ split: ; } ; define i32 @local_alloca_simplifiable_4() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_4 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 undef @@ -725,7 +725,7 @@ cond.end: ; preds = %cond.false, %cond.t ; } ; define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -832,7 +832,7 @@ entry: ; } ; define void @static_global_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_2 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: entry: @@ -1075,13 +1075,13 @@ for.end35: ; preds = %for.cond.cleanup27 ; return Flag3; ; } define i32 @static_global_simplifiable_3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_3 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 1, i32* @Flag3, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_3 ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: store i32 1, i32* @Flag3, align 4, !tbaa [[TBAA3]] @@ -1110,7 +1110,7 @@ define i32 @static_global_simplifiable_3() { ; } ; define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result, %struct.S* byval(%struct.S) align 8 %s) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -1160,7 +1160,7 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align ; TUNIT-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 ; CGSCC-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -1683,7 +1683,7 @@ join: ; preds = %right, %left ; We could simplify these if we separate accessed bins wrt. alignment (here mod 4). define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_simplifiable ; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1721,7 +1721,7 @@ entry: ; The access to bc4b could go anywhere, nothing is simplifiable. define i32 @unknown_access_mixed_not_simplifiable(i32 %arg1, i32 %arg2, i32 %arg3) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_not_simplifiable ; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1777,14 +1777,14 @@ declare void @escape(i8*) ; } ; define i32 @global_not_simplifiable_1(i32 %cnd) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@global_not_simplifiable_1 ; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret i32 [[I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@global_not_simplifiable_1 ; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -1891,13 +1891,13 @@ entry: ret i32 %i } define void @static_global_not_simplifiable_2_helper() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 2, i32* @Flag4, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 2, i32* @Flag4, align 4, !tbaa [[TBAA3]] @@ -1964,13 +1964,13 @@ define i32 @write_read_global() { ret i32 %l } define void @write_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, i32* @Gint2, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gint2, align 4 @@ -1980,13 +1980,13 @@ define void @write_global() { ret void } define i32 @read_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@read_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* @Gint2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@read_global ; CGSCC-SAME: () #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* @Gint2, align 4 @@ -1996,12 +1996,12 @@ define i32 @read_global() { ret i32 %l } define i32 @write_read_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_read_static_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_read_static_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 @@ -2011,13 +2011,13 @@ define i32 @write_read_static_global() { ret i32 %l } define void @write_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_static_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, i32* @Gstatic_int2, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_static_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gstatic_int2, align 4 @@ -2027,13 +2027,13 @@ define void @write_static_global() { ret void } define i32 @read_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@read_static_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@read_static_global ; CGSCC-SAME: () #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4 @@ -2043,12 +2043,12 @@ define i32 @read_static_global() { ret i32 %l } define i32 @write_read_static_undef_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_read_static_undef_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_read_static_undef_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 @@ -2058,12 +2058,12 @@ define i32 @write_read_static_undef_global() { ret i32 %l } define void @write_static_undef_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_static_undef_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_static_undef_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gstatic_undef_int2, align 4 @@ -2073,7 +2073,7 @@ define void @write_static_undef_global() { ret void } define i32 @read_static_undef_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@read_static_undef_global ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 7 @@ -2083,7 +2083,7 @@ define i32 @read_static_undef_global() { } define i32 @single_read_of_static_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@single_read_of_static_global ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 0 @@ -2383,7 +2383,7 @@ define i64 @cast_and_load_2() { define void @recursive_load_store(i64 %N, i32 %v) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_load_store ; TUNIT-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -2398,7 +2398,7 @@ define void @recursive_load_store(i64 %N, i32 %v) { ; TUNIT: for.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_load_store ; CGSCC-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2857,7 +2857,7 @@ entry: ; Make sure the access %1 is not forwarded to the loads %2 and %3 as the indices are ; varying and the accesses thus not "exact". This used to simplify %cmp12 to true. define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out, i32 %idx) #0 { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access ; TUNIT-SAME: (i32* nocapture nofree readonly [[IN:%.*]], i32* nocapture nofree writeonly [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -2900,7 +2900,7 @@ define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out, ; TUNIT-NEXT: [[INC16]] = add nsw i32 [[I3_0]], 1 ; TUNIT-NEXT: br label [[FOR_COND4]], !llvm.loop [[TBAA12]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access ; CGSCC-SAME: (i32* nocapture nofree readonly [[IN:%.*]], i32* nocapture nofree writeonly [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2992,7 +2992,7 @@ for.body7: ; preds = %for.cond4 ; Ensure we do not return true. define internal i1 @alloca_non_unique(i32* %p, i32 %in, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -3006,7 +3006,7 @@ define internal i1 @alloca_non_unique(i32* %p, i32 %in, i1 %c) { ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]] ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR14:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -3034,13 +3034,13 @@ f: ; Ensure we do not return true. define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique_caller ; TUNIT-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] { ; TUNIT-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR20]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique_caller ; CGSCC-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR15:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR22]] @@ -3052,22 +3052,22 @@ define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { ; Ensure we do not return %bad or %l, but %sel define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal ; TUNIT-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: store i32 [[BAD]], i32* [[A]], align 4 -; TUNIT-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR21:[0-9]+]] +; TUNIT-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR16]] ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 ; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[BAD]], i32 [[L]] ; TUNIT-NEXT: ret i32 [[SEL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal ; CGSCC-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[BAD]], i32* [[A]], align 4 -; CGSCC-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR24:[0-9]+]] +; CGSCC-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR19]] ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 ; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[BAD]], i32 [[L]] ; CGSCC-NEXT: ret i32 [[SEL]] @@ -3081,7 +3081,7 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { } define void @scope_value_traversal_helper(i32* %a, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal_helper ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 @@ -3089,7 +3089,7 @@ define void @scope_value_traversal_helper(i32* %a, i1 %c) { ; TUNIT-NEXT: store i32 [[SEL]], i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal_helper ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 @@ -3139,54 +3139,52 @@ define void @scope_value_traversal_helper(i32* %a, i1 %c) { !30 = distinct !{!30, !17} !31 = distinct !{!31, !17} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind memory(write) } ; TUNIT: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR9:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR10:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR12]] = { argmemonly nofree nosync nounwind } -; TUNIT: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR14:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR14:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; TUNIT: attributes #[[ATTR15]] = { willreturn } -; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR17]] = { nocallback } ; TUNIT: attributes #[[ATTR18]] = { norecurse } ; TUNIT: attributes #[[ATTR19]] = { nounwind } ; TUNIT: attributes #[[ATTR20]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR21]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind writeonly } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind memory(write) } ; CGSCC: attributes #[[ATTR9:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR10:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR11:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR13]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR14]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR15]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR16]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR17:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR15]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR17:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CGSCC: attributes #[[ATTR18]] = { willreturn } -; CGSCC: attributes #[[ATTR19]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR19]] = { nounwind willreturn } ; CGSCC: attributes #[[ATTR20]] = { nocallback } ; CGSCC: attributes #[[ATTR21]] = { norecurse } ; CGSCC: attributes #[[ATTR22]] = { nounwind } ; CGSCC: attributes #[[ATTR23]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR24]] = { nounwind willreturn } ;. ; TUNIT: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1} diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 7c79c4e3d059c..490b5bb7e876c 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -15,7 +15,7 @@ declare i8* @llvm.call.preallocated.arg(token, i32) ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (void (i8***)* @f1 to i8*), i8* bitcast (void (i1 (i8*)*)* @f2 to i8*)] } ;. define internal i32 addrspace(3)* @const_ptr_return_as3() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@const_ptr_return_as3 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: ret i32 addrspace(3)* @ConstAS3Ptr @@ -23,7 +23,7 @@ define internal i32 addrspace(3)* @const_ptr_return_as3() { ret i32 addrspace(3)* @ConstAS3Ptr } define internal i32* @const_ptr_return() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@const_ptr_return ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*) @@ -52,7 +52,7 @@ define void @test1_helper() { ; TEST 2 : Simplify return value define i32 @return0() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return0 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 0 @@ -61,7 +61,7 @@ define i32 @return0() { } define i32 @return1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return1 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i32 1 @@ -70,7 +70,7 @@ define i32 @return1() { } define i32 @test2_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test2_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -83,7 +83,7 @@ define i32 @test2_1(i1 %c) { ; TUNIT-NEXT: [[RET:%.*]] = phi i32 [ [[RET0]], [[IF_TRUE]] ], [ 1, [[IF_FALSE]] ] ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test2_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -116,12 +116,12 @@ end: define i32 @test2_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test2_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test2_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = tail call noundef i32 @test2_1(i1 [[C]]) #[[ATTR12]] @@ -229,7 +229,7 @@ end: } define i32 @ipccp1(i32 %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ipccp1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: br i1 true, label [[T:%.*]], label [[F:%.*]] @@ -247,7 +247,7 @@ f: } define internal i1 @ipccp2i(i1 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2i ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -265,12 +265,12 @@ f: } define i1 @ipccp2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i1 @ipccp2i() #[[ATTR12]] @@ -281,7 +281,7 @@ define i1 @ipccp2() { } define internal i1 @ipccp2ib(i1 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2ib ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -299,12 +299,12 @@ f: } define i1 @ipccp2b() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp2b ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2b ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i1 @ipccp2ib() #[[ATTR12]] @@ -315,7 +315,7 @@ define i1 @ipccp2b() { } define internal i32 @ipccp3i(i32 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp3i ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -334,12 +334,12 @@ f: } define i32 @ipccp3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp3 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp3 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i32 @ipccp3i() #[[ATTR12]] @@ -350,7 +350,7 @@ define i32 @ipccp3() { } define internal i32 @ipccp4ia(i1 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4ia ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -366,7 +366,7 @@ f: ret i32 1 } define internal i32 @ipccp4ib(i32 %a) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4ib ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -386,7 +386,7 @@ f: } define i32 @ipccp4(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp4 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -395,7 +395,7 @@ define i32 @ipccp4(i1 %c) { ; TUNIT: f: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -417,7 +417,7 @@ f: ; Do not touch complicated arguments (for now) %struct.X = type { i8* } define internal i32* @test_inalloca(i32* inalloca(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test_inalloca ; CHECK-SAME: (i32* noalias nofree nonnull returned writeonly inalloca(i32) dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32* [[A]] @@ -425,13 +425,13 @@ define internal i32* @test_inalloca(i32* inalloca(i32) %a) { ret i32* %a } define i32* @complicated_args_inalloca(i32* %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_inalloca ; TUNIT-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[CALL:%.*]] = call nonnull dereferenceable(4) i32* @test_inalloca(i32* noalias nofree writeonly inalloca(i32) "no-capture-maybe-returned" [[ARG]]) #[[ATTR9:[0-9]+]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_inalloca ; CGSCC-SAME: (i32* nofree noundef nonnull readnone dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noalias nonnull dereferenceable(4) i32* @test_inalloca(i32* noalias nofree noundef nonnull writeonly inalloca(i32) dereferenceable(4) [[ARG]]) #[[ATTR12]] @@ -442,7 +442,7 @@ define i32* @complicated_args_inalloca(i32* %arg) { } define internal i32* @test_preallocated(i32* preallocated(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test_preallocated ; CHECK-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 4294967296 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32* [[A]] @@ -460,7 +460,8 @@ define i32* @complicated_args_preallocated() { ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_preallocated ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { -; CGSCC-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR13:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR12]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 4294967296 null) #[[ATTR13:[0-9]+]] [ "preallocated"(token [[C]]) ] ; CGSCC-NEXT: ret i32* null ; %c = call token @llvm.call.preallocated.setup(i32 1) @@ -470,13 +471,13 @@ define i32* @complicated_args_preallocated() { define internal void @test_sret(%struct.X* sret(%struct.X) %a, %struct.X** %b) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test_sret ; TUNIT-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 4294967296 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test_sret ; CGSCC-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 4294967296 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 @@ -489,13 +490,13 @@ define internal void @test_sret(%struct.X* sret(%struct.X) %a, %struct.X** %b) { define void @complicated_args_sret(%struct.X** %b) { ; ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_sret ; TUNIT-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 4294967296 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 4294967296 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR9]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_sret ; CGSCC-SAME: (%struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: unreachable @@ -505,7 +506,7 @@ define void @complicated_args_sret(%struct.X** %b) { } define internal %struct.X* @test_nest(%struct.X* nest %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_nest ; CGSCC-SAME: (%struct.X* nest noalias nocapture nofree readnone align 4294967296 [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: ret %struct.X* null @@ -513,12 +514,12 @@ define internal %struct.X* @test_nest(%struct.X* nest %a) { ret %struct.X* %a } define %struct.X* @complicated_args_nest() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_nest ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret %struct.X* null ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_nest ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noalias noundef align 4294967296 %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 4294967296 null) #[[ATTR12]] @@ -530,7 +531,7 @@ define %struct.X* @complicated_args_nest() { @S = external global %struct.X define internal void @test_byval(%struct.X* byval(%struct.X) %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test_byval ; TUNIT-SAME: (i8* [[TMP0:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8 @@ -540,7 +541,7 @@ define internal void @test_byval(%struct.X* byval(%struct.X) %a) { ; TUNIT-NEXT: store i8* null, i8** [[G0]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test_byval ; CGSCC-SAME: (i8* [[TMP0:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8 @@ -555,19 +556,19 @@ define internal void @test_byval(%struct.X* byval(%struct.X) %a) { ret void } define void @complicated_args_byval() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_byval ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: [[S_CAST:%.*]] = bitcast %struct.X* @S to i8** ; TUNIT-NEXT: [[TMP1:%.*]] = load i8*, i8** [[S_CAST]], align 8 -; TUNIT-NEXT: call void @test_byval(i8* [[TMP1]]) #[[ATTR11]] +; TUNIT-NEXT: call void @test_byval(i8* [[TMP1]]) #[[ATTR9]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_byval ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: [[TMP1:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT_X:%.*]], %struct.X* @S, i32 0, i32 0), align 8 -; CGSCC-NEXT: call void @test_byval(i8* nofree writeonly [[TMP1]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: call void @test_byval(i8* nofree writeonly [[TMP1]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; call void @test_byval(%struct.X* byval(%struct.X) @S) @@ -610,7 +611,7 @@ define i8* @complicated_args_byval2() { } define void @fixpoint_changed(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@fixpoint_changed ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: @@ -633,7 +634,7 @@ define void @fixpoint_changed(i32* %p) { ; TUNIT: for.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@fixpoint_changed ; CGSCC-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: @@ -684,12 +685,12 @@ for.end: ; Check we merge undef and a constant properly. define i8 @caller0() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller0 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller0 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -699,12 +700,12 @@ define i8 @caller0() { ret i8 %c } define i8 @caller1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller1 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -714,12 +715,12 @@ define i8 @caller1() { ret i8 %c } define i8 @caller2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -729,12 +730,12 @@ define i8 @caller2() { ret i8 %c } define i8 @caller_middle() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller_middle ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller_middle ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -744,12 +745,12 @@ define i8 @caller_middle() { ret i8 %c } define i8 @caller3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller3 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller3 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -759,12 +760,12 @@ define i8 @caller3() { ret i8 %c } define i8 @caller4() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller4 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller4 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -774,7 +775,7 @@ define i8 @caller4() { ret i8 %c } define internal i8 @callee(i8 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8 49 @@ -784,13 +785,13 @@ define internal i8 @callee(i8 %a) { } define void @user_as3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user_as3 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: store i32 0, i32 addrspace(3)* @ConstAS3Ptr, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@user_as3 ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call fastcc align 4 i32 addrspace(3)* @const_ptr_return_as3() #[[ATTR12]] @@ -802,13 +803,13 @@ define void @user_as3() { ret void } define void @user() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: store i32 0, i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@user ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[CALL:%.*]] = call fastcc align 4 i32* @const_ptr_return() #[[ATTR12]] @@ -822,12 +823,12 @@ define void @user() { define i1 @test_merge_with_undef_values_ptr(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R1:%.*]] = call noundef i1 @undef_then_null(i1 [[C]]) #[[ATTR12]] @@ -837,7 +838,7 @@ define i1 @test_merge_with_undef_values_ptr(i1 %c) { ret i1 %r1 } define internal i1 @undef_then_null(i1 %c, i32* %i32Aptr, i32* %i32Bptr) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_then_null ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -858,12 +859,12 @@ b: } define i1 @test_merge_with_undef_values(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_merge_with_undef_values ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_merge_with_undef_values ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R1:%.*]] = call noundef i1 @undef_then_1(i1 [[C]]) #[[ATTR12]] @@ -874,7 +875,7 @@ define i1 @test_merge_with_undef_values(i1 %c) { } define internal i1 @undef_then_1(i1 %c, i32 %i32A, i32 %i32B) { ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_then_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -895,12 +896,12 @@ b: } define i32 @test_select(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_select ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_select ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noundef i32 @select() #[[ATTR12]] @@ -911,7 +912,7 @@ define i32 @test_select(i32 %c) { } define internal i32 @select(i1 %a, i32 %b, i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32 42 @@ -921,7 +922,7 @@ define internal i32 @select(i1 %a, i32 %b, i32 %c) { } define i1 @icmp() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@icmp ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i1 true @@ -973,14 +974,14 @@ define internal void @unknown_calle_arg_is_undef(void (i32)* %fn, i32 %arg) { @g = internal constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (void (i8***)* @f1 to i8*), i8* bitcast (void (i1 (i8*)*)* @f2 to i8*)] } define internal void @f1(i8*** %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i8** getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @g, i32 0, i32 0, i32 0), i8*** [[A]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: @@ -1044,12 +1045,12 @@ entry: define i1 @test_cmp_null_after_cast() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_cmp_null_after_cast ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_cmp_null_after_cast ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i1 @cmp_null_after_cast() #[[ATTR12]] @@ -1059,7 +1060,7 @@ define i1 @test_cmp_null_after_cast() { ret i1 %c } define internal i1 @cmp_null_after_cast(i32 %a, i8 %b) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cmp_null_after_cast ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i1 true @@ -1145,7 +1146,7 @@ join: } define i1 @test_liveness(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_liveness ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -1157,7 +1158,7 @@ define i1 @test_liveness(i1 %c) { ; TUNIT-NEXT: [[RC1:%.*]] = call i1 @ret(i1 noundef [[P]]) #[[ATTR9]] ; TUNIT-NEXT: ret i1 [[RC1]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_liveness ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -1180,7 +1181,7 @@ f: } define internal i1 @ret(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret ; CHECK-SAME: (i1 noundef [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1228,7 +1229,7 @@ define internal i8 @memcpy_uses_store(i8 %arg) { ; TUNIT-NEXT: [[SRC:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[DST:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: store i8 [[ARG]], i8* [[SRC]], align 1 -; TUNIT-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR10]] +; TUNIT-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[DST]], align 1 ; TUNIT-NEXT: ret i8 [[L]] ; @@ -1238,7 +1239,7 @@ define internal i8 @memcpy_uses_store(i8 %arg) { ; CGSCC-NEXT: [[SRC:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: [[DST:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: store i8 [[ARG]], i8* [[SRC]], align 1 -; CGSCC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR13]] +; CGSCC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[DST]], align 1 ; CGSCC-NEXT: ret i8 [[L]] ; @@ -1254,13 +1255,13 @@ define i8 @memcpy_uses_store_caller(i8 %arg) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@memcpy_uses_store_caller ; TUNIT-SAME: (i8 [[ARG:%.*]]) #[[ATTR2]] { -; TUNIT-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR9]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@memcpy_uses_store_caller ; CGSCC-SAME: (i8 [[ARG:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR15:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR13]] ; CGSCC-NEXT: ret i8 [[R]] ; %r = call i8 @memcpy_uses_store(i8 %arg) @@ -1271,7 +1272,7 @@ define i8 @memcpy_uses_store_caller(i8 %arg) { declare i32 @speculatable() speculatable readnone define i32 @test_speculatable_expr() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_speculatable_expr ; TUNIT-SAME: () #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i32, align 4 @@ -1279,10 +1280,10 @@ define i32 @test_speculatable_expr() norecurse { ; TUNIT-NEXT: [[PLUS1:%.*]] = add i32 [[SPEC_RESULT]], 1 ; TUNIT-NEXT: store i32 [[PLUS1]], i32* [[STACK]], align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[STACK]], align 4 -; TUNIT-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR13:[0-9]+]] +; TUNIT-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret i32 [[RSPEC]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_speculatable_expr ; CGSCC-SAME: () #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i32, align 4 @@ -1301,7 +1302,7 @@ define i32 @test_speculatable_expr() norecurse { } define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@ret_speculatable_expr ; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4 @@ -1311,7 +1312,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { ; TUNIT-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 7 ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@ret_speculatable_expr ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4 @@ -1330,34 +1331,32 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { ;. ; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { readnone speculatable } -; TUNIT: attributes #[[ATTR6]] = { norecurse nosync readnone } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { speculatable memory(none) } +; TUNIT: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR10]] = { willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR13]] = { nosync nounwind readonly } +; TUNIT: attributes #[[ATTR11]] = { willreturn memory(readwrite) } +; TUNIT: attributes #[[ATTR12]] = { nosync nounwind } ;. ; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable } -; CGSCC: attributes #[[ATTR9]] = { norecurse nosync readnone } -; CGSCC: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR11:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CGSCC: attributes #[[ATTR12]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR15]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR8:[0-9]+]] = { speculatable memory(none) } +; CGSCC: attributes #[[ATTR9]] = { norecurse nosync memory(none) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR11:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR12]] = { willreturn } +; CGSCC: attributes #[[ATTR13]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR14]] = { willreturn memory(readwrite) } ;. diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index c448587423b19..76675f9c8ab68 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i54:64-f80:128-n8:16:32:64-S128" ; TEST 1 (positive case) define void @only_return() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@only_return ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -28,7 +28,7 @@ define void @only_return() #0 { ; FIXME: missing willreturn define i32 @fib(i32 %0) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@fib ; TUNIT-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 2 @@ -43,16 +43,16 @@ define i32 @fib(i32 %0) local_unnamed_addr #0 { ; TUNIT: 9: ; TUNIT-NEXT: ret i32 [[TMP0]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@fib ; CGSCC-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 2 ; CGSCC-NEXT: br i1 [[TMP2]], label [[TMP9:%.*]], label [[TMP3:%.*]] ; CGSCC: 3: ; CGSCC-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP0]], -1 -; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @fib(i32 [[TMP4]]) #[[ATTR19:[0-9]+]] +; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @fib(i32 [[TMP4]]) #[[ATTR27:[0-9]+]] ; CGSCC-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], -2 -; CGSCC-NEXT: [[TMP7:%.*]] = tail call i32 @fib(i32 [[TMP6]]) #[[ATTR19]] +; CGSCC-NEXT: [[TMP7:%.*]] = tail call i32 @fib(i32 [[TMP6]]) #[[ATTR27]] ; CGSCC-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP7]], [[TMP5]] ; CGSCC-NEXT: ret i32 [[TMP8]] ; CGSCC: 9: @@ -84,7 +84,7 @@ define i32 @fib(i32 %0) local_unnamed_addr #0 { ; fact_maybe_not(-1) doesn't stop. define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@fact_maybe_not_halt ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -132,7 +132,7 @@ define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { ; } define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@fact_loop ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -176,16 +176,27 @@ define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { declare void @sink() nounwind willreturn nosync nofree define void @mutual_recursion1(i1 %c) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@mutual_recursion1 -; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] -; CHECK: rec: -; CHECK-NEXT: call void @sink() #[[ATTR12:[0-9]+]] -; CHECK-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR27:[0-9]+]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree noinline nosync nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion1 +; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { +; TUNIT-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] +; TUNIT: rec: +; TUNIT-NEXT: call void @sink() #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR26]] +; TUNIT-NEXT: br label [[END]] +; TUNIT: end: +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree noinline nosync nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion1 +; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { +; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] +; CGSCC: rec: +; CGSCC-NEXT: call void @sink() #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR27]] +; CGSCC-NEXT: br label [[END]] +; CGSCC: end: +; CGSCC-NEXT: ret void ; br i1 %c, label %rec, label %end rec: @@ -198,11 +209,17 @@ end: define void @mutual_recursion2(i1 %c) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@mutual_recursion2 -; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR27]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree noinline nosync nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion2 +; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR26]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree noinline nosync nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion2 +; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { +; CGSCC-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR27]] +; CGSCC-NEXT: ret void ; call void @mutual_recursion1(i1 %c) ret void @@ -277,12 +294,12 @@ define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_ ; TEST 6 (positive case) ; Call intrinsic function -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare float @llvm.floor.f32(float) declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret void @@ -292,11 +309,17 @@ define void @call_floor(float %a) #0 { } define float @call_floor2(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@call_floor2 -; CHECK-SAME: (float [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR28:[0-9]+]] -; CHECK-NEXT: ret float [[C]] +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable +; TUNIT-LABEL: define {{[^@]+}}@call_floor2 +; TUNIT-SAME: (float [[A:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR27:[0-9]+]] +; TUNIT-NEXT: ret float [[C]] +; +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable +; CGSCC-LABEL: define {{[^@]+}}@call_floor2 +; CGSCC-SAME: (float [[A:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR28:[0-9]+]] +; CGSCC-NEXT: ret float [[C]] ; %c = tail call float @llvm.floor.f32(float %a) ret float %c @@ -312,11 +335,17 @@ define float @call_floor2(float %a) #0 { declare void @maybe_noreturn() #0 define void @call_maybe_noreturn() #0 { -; CHECK: Function Attrs: noinline nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@call_maybe_noreturn -; CHECK-SAME: () #[[ATTR7]] { -; CHECK-NEXT: tail call void @maybe_noreturn() #[[ATTR29:[0-9]+]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: noinline nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@call_maybe_noreturn +; TUNIT-SAME: () #[[ATTR7]] { +; TUNIT-NEXT: tail call void @maybe_noreturn() #[[ATTR28:[0-9]+]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: noinline nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@call_maybe_noreturn +; CGSCC-SAME: () #[[ATTR7]] { +; CGSCC-NEXT: tail call void @maybe_noreturn() #[[ATTR29:[0-9]+]] +; CGSCC-NEXT: ret void ; tail call void @maybe_noreturn() ret void @@ -331,11 +360,17 @@ define void @call_maybe_noreturn() #0 { declare void @will_return() willreturn norecurse define void @f1() #0 { -; CHECK: Function Attrs: noinline nounwind willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@f1 -; CHECK-SAME: () #[[ATTR10:[0-9]+]] { -; CHECK-NEXT: tail call void @will_return() #[[ATTR30:[0-9]+]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: noinline nounwind willreturn uwtable +; TUNIT-LABEL: define {{[^@]+}}@f1 +; TUNIT-SAME: () #[[ATTR10:[0-9]+]] { +; TUNIT-NEXT: tail call void @will_return() #[[ATTR27]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: noinline nounwind willreturn uwtable +; CGSCC-LABEL: define {{[^@]+}}@f1 +; CGSCC-SAME: () #[[ATTR10:[0-9]+]] { +; CGSCC-NEXT: tail call void @will_return() #[[ATTR28]] +; CGSCC-NEXT: ret void ; tail call void @will_return() ret void @@ -344,8 +379,8 @@ define void @f1() #0 { define void @f2() #0 { ; CHECK: Function Attrs: noinline nounwind willreturn uwtable ; CHECK-LABEL: define {{[^@]+}}@f2 -; CHECK-SAME: () #[[ATTR10]] { -; CHECK-NEXT: tail call void @f1() #[[ATTR12]] +; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-NEXT: tail call void @f1() #[[ATTR12:[0-9]+]] ; CHECK-NEXT: ret void ; tail call void @f1() @@ -384,17 +419,29 @@ label2: declare i1 @maybe_raise_exception() #1 willreturn define void @invoke_test() personality i32 (...)* @__gxx_personality_v0 { -; CHECK: Function Attrs: nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@invoke_test -; CHECK-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { -; CHECK-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR30]] -; CHECK-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] -; CHECK: N: -; CHECK-NEXT: ret void -; CHECK: F: -; CHECK-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: catch i8* null -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@invoke_test +; TUNIT-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { +; TUNIT-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR27]] +; TUNIT-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] +; TUNIT: N: +; TUNIT-NEXT: ret void +; TUNIT: F: +; TUNIT-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } +; TUNIT-NEXT: catch i8* null +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@invoke_test +; CGSCC-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { +; CGSCC-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR28]] +; CGSCC-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] +; CGSCC: N: +; CGSCC-NEXT: ret void +; CGSCC: F: +; CGSCC-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } +; CGSCC-NEXT: catch i8* null +; CGSCC-NEXT: ret void ; invoke i1 @maybe_raise_exception() to label %N unwind label %F @@ -420,7 +467,7 @@ declare i32 @__gxx_personality_v0(...) ; } define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_constant_trip_count ; CHECK-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[TMP0:%.*]]) #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: br label [[TMP3:%.*]] @@ -464,7 +511,7 @@ define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { ; return ans; ; } define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_trip_count_unbound ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32* nocapture nofree readonly [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] @@ -515,7 +562,7 @@ define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_trip_dec ; CHECK-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR13]] { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], -1 @@ -562,7 +609,7 @@ define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr ; multiple return define i32 @multiple_return(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@multiple_return ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[A]], 0 @@ -586,13 +633,21 @@ f: ; 15.1 (positive case) define void @unreachable_exit_positive1() #0 { -; CHECK: Function Attrs: noinline nounwind willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_positive1 -; CHECK-SAME: () #[[ATTR10]] { -; CHECK-NEXT: tail call void @will_return() #[[ATTR30]] -; CHECK-NEXT: ret void -; CHECK: unreachable_label: -; CHECK-NEXT: unreachable +; TUNIT: Function Attrs: noinline nounwind willreturn uwtable +; TUNIT-LABEL: define {{[^@]+}}@unreachable_exit_positive1 +; TUNIT-SAME: () #[[ATTR10]] { +; TUNIT-NEXT: tail call void @will_return() #[[ATTR27]] +; TUNIT-NEXT: ret void +; TUNIT: unreachable_label: +; TUNIT-NEXT: unreachable +; +; CGSCC: Function Attrs: noinline nounwind willreturn uwtable +; CGSCC-LABEL: define {{[^@]+}}@unreachable_exit_positive1 +; CGSCC-SAME: () #[[ATTR10]] { +; CGSCC-NEXT: tail call void @will_return() #[[ATTR28]] +; CGSCC-NEXT: ret void +; CGSCC: unreachable_label: +; CGSCC-NEXT: unreachable ; tail call void @will_return() ret void @@ -603,7 +658,7 @@ unreachable_label: } define i32 @unreachable_exit_positive2(i32) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_positive2 ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -662,7 +717,7 @@ unreachable_label: } define void @unreachable_exit_negative2() #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_negative2 ; CHECK-SAME: () #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: br label [[L1:%.*]] @@ -711,7 +766,7 @@ define void @call_longjmp(i8* nocapture readnone %0) local_unnamed_addr #0 { ; } define i32 @infinite_loop_inside_bounded_loop(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@infinite_loop_inside_bounded_loop ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17:[0-9]+]] { ; CHECK-NEXT: entry: @@ -772,7 +827,7 @@ for.end: ; preds = %for.cond.cleanup ; } define i32 @bounded_nested_loops(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bounded_nested_loops ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR18:[0-9]+]] { ; CHECK-NEXT: entry: @@ -849,7 +904,7 @@ for.end: ; preds = %for.cond.cleanup ; } define i32 @bounded_loop_inside_unbounded_loop(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@bounded_loop_inside_unbounded_loop ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; CHECK-NEXT: entry: @@ -933,7 +988,7 @@ while.end: ; preds = %while.cond ; } define i32 @nested_unbounded_loops(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@nested_unbounded_loops ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; CHECK-NEXT: entry: @@ -1023,7 +1078,7 @@ while.end11: ; preds = %while.cond ; } define void @non_loop_cycle(i32 %n) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@non_loop_cycle ; TUNIT-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; TUNIT-NEXT: entry: @@ -1053,9 +1108,9 @@ define void @non_loop_cycle(i32 %n) { ; TUNIT: exit: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@non_loop_cycle -; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR19]] { +; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR19:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @fact_loop(i32 [[N]]) ; CGSCC-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 5 @@ -1143,29 +1198,29 @@ define void @willreturn_mustprogress_caller_1() mustprogress { ret void } define void @willreturn_mustprogress_caller_2() mustprogress { -; TUNIT: Function Attrs: mustprogress readonly willreturn +; TUNIT: Function Attrs: mustprogress willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_2 ; TUNIT-SAME: () #[[ATTR23:[0-9]+]] { -; TUNIT-NEXT: call void @readonly() #[[ATTR19:[0-9]+]] +; TUNIT-NEXT: call void @readonly() ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: mustprogress readonly willreturn +; CGSCC: Function Attrs: mustprogress willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_2 ; CGSCC-SAME: () #[[ATTR24:[0-9]+]] { -; CGSCC-NEXT: call void @readonly() #[[ATTR20:[0-9]+]] +; CGSCC-NEXT: call void @readonly() ; CGSCC-NEXT: ret void ; call void @readonly() ret void } define void @willreturn_mustprogress_caller_3() mustprogress { -; TUNIT: Function Attrs: mustprogress nosync readnone willreturn +; TUNIT: Function Attrs: mustprogress nosync willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_3 ; TUNIT-SAME: () #[[ATTR24:[0-9]+]] { ; TUNIT-NEXT: call void @readnone() ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: mustprogress nosync readnone willreturn +; CGSCC: Function Attrs: mustprogress nosync willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_3 ; CGSCC-SAME: () #[[ATTR25:[0-9]+]] { ; CGSCC-NEXT: call void @readnone() @@ -1183,16 +1238,16 @@ define void @willreturn_mustprogress_callee_1() { ret void } define void @willreturn_mustprogress_callee_2() { -; TUNIT: Function Attrs: readonly willreturn +; TUNIT: Function Attrs: willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_2 ; TUNIT-SAME: () #[[ATTR25:[0-9]+]] { -; TUNIT-NEXT: call void @readonly_mustprogress() #[[ATTR25]] +; TUNIT-NEXT: call void @readonly_mustprogress() #[[ATTR27]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readonly willreturn +; CGSCC: Function Attrs: willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_2 ; CGSCC-SAME: () #[[ATTR26:[0-9]+]] { -; CGSCC-NEXT: call void @readonly_mustprogress() #[[ATTR26]] +; CGSCC-NEXT: call void @readonly_mustprogress() #[[ATTR28]] ; CGSCC-NEXT: ret void ; call void @readonly_mustprogress() @@ -1207,16 +1262,16 @@ define void @willreturn_mustprogress_callee_3() { ret void } define void @willreturn_mustprogress_callee_4() { -; TUNIT: Function Attrs: readonly willreturn +; TUNIT: Function Attrs: willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_4 ; TUNIT-SAME: () #[[ATTR25]] { -; TUNIT-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR25]] +; TUNIT-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR27]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readonly willreturn +; CGSCC: Function Attrs: willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_4 ; CGSCC-SAME: () #[[ATTR26]] { -; CGSCC-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR26]] +; CGSCC-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR28]] ; CGSCC-NEXT: ret void ; call void @willreturn_mustprogress_callee_2() @@ -1226,67 +1281,64 @@ define void @willreturn_mustprogress_callee_4() { attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } ; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind uwtable } ; TUNIT: attributes #[[ATTR5]] = { noreturn } ; TUNIT: attributes #[[ATTR6]] = { noinline noreturn nounwind uwtable } ; TUNIT: attributes #[[ATTR7]] = { noinline nounwind uwtable } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; TUNIT: attributes #[[ATTR9:[0-9]+]] = { norecurse willreturn } ; TUNIT: attributes #[[ATTR10]] = { noinline nounwind willreturn uwtable } ; TUNIT: attributes #[[ATTR11:[0-9]+]] = { noinline willreturn uwtable } ; TUNIT: attributes #[[ATTR12]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR13]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; TUNIT: attributes #[[ATTR14]] = { argmemonly nofree noinline norecurse nosync nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } +; TUNIT: attributes #[[ATTR13]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR14]] = { nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } ; TUNIT: attributes #[[ATTR16:[0-9]+]] = { noreturn nounwind } -; TUNIT: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR19]] = { readonly } -; TUNIT: attributes #[[ATTR20:[0-9]+]] = { readnone } +; TUNIT: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR19:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR20:[0-9]+]] = { memory(none) } ; TUNIT: attributes #[[ATTR21]] = { mustprogress } -; TUNIT: attributes #[[ATTR22:[0-9]+]] = { mustprogress readonly } -; TUNIT: attributes #[[ATTR23]] = { mustprogress readonly willreturn } -; TUNIT: attributes #[[ATTR24]] = { mustprogress nosync readnone willreturn } -; TUNIT: attributes #[[ATTR25]] = { readonly willreturn } -; TUNIT: attributes #[[ATTR26]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR27]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR28]] = { readnone willreturn } -; TUNIT: attributes #[[ATTR29]] = { nounwind } -; TUNIT: attributes #[[ATTR30]] = { willreturn } +; TUNIT: attributes #[[ATTR22:[0-9]+]] = { mustprogress memory(read) } +; TUNIT: attributes #[[ATTR23]] = { mustprogress willreturn memory(read) } +; TUNIT: attributes #[[ATTR24]] = { mustprogress nosync willreturn memory(none) } +; TUNIT: attributes #[[ATTR25]] = { willreturn memory(read) } +; TUNIT: attributes #[[ATTR26]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR27]] = { willreturn } +; TUNIT: attributes #[[ATTR28]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR4]] = { nofree noinline nosync nounwind uwtable } ; CGSCC: attributes #[[ATTR5]] = { noreturn } ; CGSCC: attributes #[[ATTR6]] = { noinline noreturn nounwind uwtable } ; CGSCC: attributes #[[ATTR7]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CGSCC: attributes #[[ATTR9:[0-9]+]] = { norecurse willreturn } ; CGSCC: attributes #[[ATTR10]] = { noinline nounwind willreturn uwtable } ; CGSCC: attributes #[[ATTR11:[0-9]+]] = { noinline willreturn uwtable } ; CGSCC: attributes #[[ATTR12]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR13]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR14]] = { argmemonly nofree noinline norecurse nosync nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } +; CGSCC: attributes #[[ATTR13]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR14]] = { nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } ; CGSCC: attributes #[[ATTR16:[0-9]+]] = { noreturn nounwind } -; CGSCC: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind readnone } -; CGSCC: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR20]] = { readonly } -; CGSCC: attributes #[[ATTR21:[0-9]+]] = { readnone } +; CGSCC: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR20:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR21:[0-9]+]] = { memory(none) } ; CGSCC: attributes #[[ATTR22]] = { mustprogress } -; CGSCC: attributes #[[ATTR23:[0-9]+]] = { mustprogress readonly } -; CGSCC: attributes #[[ATTR24]] = { mustprogress readonly willreturn } -; CGSCC: attributes #[[ATTR25]] = { mustprogress nosync readnone willreturn } -; CGSCC: attributes #[[ATTR26]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR23:[0-9]+]] = { mustprogress memory(read) } +; CGSCC: attributes #[[ATTR24]] = { mustprogress willreturn memory(read) } +; CGSCC: attributes #[[ATTR25]] = { mustprogress nosync willreturn memory(none) } +; CGSCC: attributes #[[ATTR26]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR27]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR28]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR28]] = { willreturn } ; CGSCC: attributes #[[ATTR29]] = { nounwind } -; CGSCC: attributes #[[ATTR30]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/wrapper.ll b/llvm/test/Transforms/Attributor/wrapper.ll index 85bf78a69d2ea..34af977467961 100644 --- a/llvm/test/Transforms/Attributor/wrapper.ll +++ b/llvm/test/Transforms/Attributor/wrapper.ll @@ -8,7 +8,7 @@ ; CHECK: ret ; ; Check the original function, which is wrapped and becomes anonymous -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK: define internal noundef i32 @0() ; CHECK: ret i32 1 define linkonce i32 @inner1() { @@ -35,7 +35,7 @@ entry: ; CHECK: tail call i32 @1(i32 %a, i32 %b) ; CHECK: ret ; -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK: define internal i32 @1(i32 %a, i32 %b) ; CHECK: %c = add i32 %a, %b ; CHECK: ret i32 %c diff --git a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll index eede209fbdd0f..c96377fd1c6d8 100644 --- a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -50,7 +50,7 @@ suspend: ; CHECK_SPLITTED-NEXT: call void @nop() ; CHECK_SPLITTED-NEXT: call void @print_same() ; -; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone } +; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { memory(none) } ; ; CHECK_UNSPLITTED-LABEL: @f( ; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff diff --git a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll index 047545a8115a1..9ff103cdda8f9 100644 --- a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll +++ b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll @@ -45,7 +45,7 @@ bb2: ; preds = %bb1, %bb declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { nounwind ssp } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #2 = { noinline nounwind ssp } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll index 13d7682fcf877..23d6c5ae1fdb6 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define i32 @a() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@a ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP:%.*]] = call i32 @b() @@ -13,7 +13,7 @@ define i32 @a() { } define i32 @b() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@b ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = call i32 @a() diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll index 75fb113f11436..ee8437e8c0f1a 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll @@ -6,7 +6,7 @@ declare i32 @e() readnone define i32 @f() { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: @f( ; CHECK-NEXT: [[TMP:%.*]] = call i32 @e() ; CHECK-NEXT: ret i32 [[TMP]] @@ -16,7 +16,7 @@ define i32 @f() { } define i32 @g() readonly { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @g( ; CHECK-NEXT: ret i32 0 ; @@ -24,7 +24,7 @@ define i32 @g() readonly { } define i32 @h() readnone { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @h( ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr @x, align 4 ; CHECK-NEXT: ret i32 [[TMP]] diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll index e37667d94fb31..9e1b9d7e2763f 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define i32 @f() { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = call i32 @e() diff --git a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll index 178b075f1ec78..7a97498b8f328 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll @@ -4,7 +4,7 @@ @s = external constant i8 define i8 @f() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @f( ; CHECK-NEXT: [[TMP:%.*]] = load i8, ptr @s, align 1 ; CHECK-NEXT: ret i8 [[TMP]] diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll index 5ea9582dfe10e..fb73dd770fa9d 100644 --- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll @@ -4,7 +4,7 @@ @g = global i32 20 define void @test_no_read_or_write() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test_no_read_or_write( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void @@ -14,7 +14,7 @@ entry: } define i32 @test_only_read_arg(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_only_read_arg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR:%.*]], align 4 @@ -26,7 +26,7 @@ entry: } define i32 @test_only_read_arg_already_has_argmemonly(ptr %ptr) argmemonly { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_only_read_arg_already_has_argmemonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR:%.*]], align 4 @@ -38,7 +38,7 @@ entry: } define i32 @test_read_global() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: @test_read_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr @g, align 4 @@ -50,7 +50,7 @@ entry: } define i32 @test_read_loaded_ptr(ptr %ptr) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none) ; CHECK-LABEL: @test_read_loaded_ptr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8 @@ -64,7 +64,7 @@ entry: } define void @test_only_write_arg(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: @test_only_write_arg( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 @@ -76,7 +76,7 @@ entry: } define void @test_write_global() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: @test_write_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr @g, align 4 @@ -103,7 +103,7 @@ entry: declare i32 @fn_readnone() readnone define void @test_call_readnone(ptr %ptr) { -; CHECK: Function Attrs: argmemonly writeonly +; CHECK: Function Attrs: memory(argmem: write) ; CHECK-LABEL: @test_call_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @fn_readnone() @@ -119,7 +119,7 @@ entry: declare i32 @fn_argmemonly(ptr) argmemonly define i32 @test_call_argmemonly(ptr %ptr) { -; CHECK: Function Attrs: argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite) ; CHECK-LABEL: @test_call_argmemonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @fn_argmemonly(ptr [[PTR:%.*]]) @@ -131,7 +131,7 @@ entry: } define i32 @test_call_fn_where_argmemonly_can_be_inferred(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_call_fn_where_argmemonly_can_be_inferred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @test_only_read_arg(ptr [[PTR:%.*]]) @@ -143,7 +143,7 @@ entry: } define void @test_memcpy_argonly(ptr %dst, ptr %src) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test_memcpy_argonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 32, i1 false) @@ -159,7 +159,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) @arr = global [32 x i8] zeroinitializer define void @test_memcpy_src_global(ptr %dst) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @test_memcpy_src_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr @arr, i64 32, i1 false) @@ -171,7 +171,7 @@ entry: } define void @test_memcpy_dst_global(ptr %src) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @test_memcpy_dst_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr @arr, ptr [[SRC:%.*]], i64 32, i1 false) @@ -183,7 +183,7 @@ entry: } define i32 @test_read_arg_access_alloca(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_read_arg_access_alloca( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -203,7 +203,7 @@ entry: declare void @fn_inaccessiblememonly() inaccessiblememonly define void @test_inaccessiblememonly() { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @test_inaccessiblememonly( ; CHECK-NEXT: call void @fn_inaccessiblememonly() ; CHECK-NEXT: ret void @@ -213,9 +213,9 @@ define void @test_inaccessiblememonly() { } define void @test_inaccessiblememonly_readonly() { -; CHECK: Function Attrs: inaccessiblememonly nofree readonly +; CHECK: Function Attrs: nofree memory(inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessiblememonly_readonly( -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15:[0-9]+]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16:[0-9]+]] ; CHECK-NEXT: ret void ; call void @fn_inaccessiblememonly() readonly @@ -223,10 +223,10 @@ define void @test_inaccessiblememonly_readonly() { } define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree readonly +; CHECK: Function Attrs: nofree memory(argmem: read, inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessibleorargmemonly_readonly( ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG:%.*]], align 4 -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16]] ; CHECK-NEXT: ret void ; load i32, ptr %arg @@ -235,10 +235,10 @@ define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { } define void @test_inaccessibleorargmemonly_readwrite(ptr %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: write, inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessibleorargmemonly_readwrite( ; CHECK-NEXT: store i32 0, ptr [[ARG:%.*]], align 4 -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16]] ; CHECK-NEXT: ret void ; store i32 0, ptr %arg diff --git a/llvm/test/Transforms/FunctionAttrs/atomic.ll b/llvm/test/Transforms/FunctionAttrs/atomic.ll index 33711acd7d540..8635f2bbdc498 100644 --- a/llvm/test/Transforms/FunctionAttrs/atomic.ll +++ b/llvm/test/Transforms/FunctionAttrs/atomic.ll @@ -4,7 +4,7 @@ ; Atomic load/store to local doesn't affect whether a function is ; readnone/readonly. define i32 @test1(i32 %x) uwtable ssp { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone ssp willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind ssp willreturn memory(none) uwtable ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 @@ -21,7 +21,7 @@ entry: ; A function with an Acquire load is not readonly. define i32 @test2(ptr %x) uwtable ssp { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind ssp willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind ssp willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X:%.*]] seq_cst, align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/convergent.ll b/llvm/test/Transforms/FunctionAttrs/convergent.ll index 34598628688fc..0263e0ec22551 100644 --- a/llvm/test/Transforms/FunctionAttrs/convergent.ll +++ b/llvm/test/Transforms/FunctionAttrs/convergent.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=function-attrs -S < %s | FileCheck %s define i32 @nonleaf() convergent { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonleaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @leaf() @@ -13,7 +13,7 @@ define i32 @nonleaf() convergent { } define i32 @leaf() convergent { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -85,7 +85,7 @@ define i32 @intrinsic() convergent { } define i32 @recursive1() convergent { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive1 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @recursive2() #[[ATTR1]] @@ -96,7 +96,7 @@ define i32 @recursive1() convergent { } define i32 @recursive2() convergent { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive2 ; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @recursive1() #[[ATTR1]] diff --git a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll index b2b46f6f4974c..7e246c482431e 100644 --- a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll @@ -5,7 +5,7 @@ ; function attributes when we derive readnone. define ptr @given_argmem_infer_readnone(ptr %p) #0 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_argmem_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] @@ -15,7 +15,7 @@ entry: } define ptr @given_inaccessible_infer_readnone(ptr %p) #1 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_inaccessible_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] @@ -25,7 +25,7 @@ entry: } define ptr @given_inaccessible_or_argmem_infer_readnone(ptr %p) #2 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_inaccessible_or_argmem_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] diff --git a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll index 98a36ac17e19a..9ba82e2dc1cce 100644 --- a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll +++ b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll @@ -7,7 +7,7 @@ declare void @llvm.sideeffect() ; is present. define void @test() { -; CHECK: Function Attrs: inaccessiblememonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @test( ; CHECK-NEXT: call void @llvm.sideeffect() ; CHECK-NEXT: ret void @@ -17,7 +17,7 @@ define void @test() { } define void @loop() { -; CHECK: Function Attrs: inaccessiblememonly nofree noreturn nosync nounwind +; CHECK: Function Attrs: nofree noreturn nosync nounwind memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @loop( ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll index 81064c2f34889..0fe0eadf5f669 100644 --- a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll @@ -14,7 +14,7 @@ declare void @_ZdaPv(ptr) local_unnamed_addr #2 ; TEST 1 (positive case) define void @only_return() #0 { -; FNATTR: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; FNATTR: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@only_return ; FNATTR-SAME: () #[[ATTR3:[0-9]+]] { ; FNATTR-NEXT: ret void @@ -101,7 +101,7 @@ end: define void @mutual_recursion1() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@mutual_recursion1 ; FNATTR-SAME: () #[[ATTR4:[0-9]+]] { ; FNATTR-NEXT: call void @mutual_recursion2() @@ -112,7 +112,7 @@ define void @mutual_recursion1() #0 { } define void @mutual_recursion2() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@mutual_recursion2 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: call void @mutual_recursion1() @@ -174,7 +174,7 @@ define noalias ptr @call_realloc(ptr nocapture %0, i64 %1) local_unnamed_addr #0 declare void @nofree_function() nofree readnone #0 define void @call_nofree_function() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@call_nofree_function ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @nofree_function() @@ -225,7 +225,7 @@ define void @call_both() #0 { declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; FNATTR: Function Attrs: mustprogress nofree noinline nosync nounwind readnone willreturn uwtable +; FNATTR: Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@call_floor ; FNATTR-SAME: (float [[A:%.*]]) #[[ATTR7:[0-9]+]] { ; FNATTR-NEXT: [[TMP1:%.*]] = tail call float @llvm.floor.f32(float [[A]]) @@ -239,7 +239,7 @@ define void @call_floor(float %a) #0 { ; Check propagation. define void @f1() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@f1 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @nofree_function() @@ -250,7 +250,7 @@ define void @f1() #0 { } define void @f2() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@f2 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @f1() diff --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll index 020d6d23af0a3..ca56117eeacc8 100644 --- a/llvm/test/Transforms/FunctionAttrs/nofree.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll @@ -34,7 +34,7 @@ entry: declare void @free(ptr nocapture) local_unnamed_addr #2 define i32 @_Z4foo3Pi(ptr nocapture readonly %a) local_unnamed_addr #3 { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: @_Z4foo3Pi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 @@ -81,8 +81,8 @@ define noalias ptr @_Z4foo6Pm(ptr nocapture %a) local_unnamed_addr #1 { ; CHECK: Function Attrs: nounwind uwtable ; CHECK-LABEL: @_Z4foo6Pm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @realloc(ptr [[A]], i64 [[TMP1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @realloc(ptr [[A]], i64 [[TMP0]]) #[[ATTR2]] ; CHECK-NEXT: ret ptr [[CALL]] ; entry: diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse.ll b/llvm/test/Transforms/FunctionAttrs/norecurse.ll index fe262a847537c..4340956312946 100644 --- a/llvm/test/Transforms/FunctionAttrs/norecurse.ll +++ b/llvm/test/Transforms/FunctionAttrs/norecurse.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs),rpo-function-attrs' -S | FileCheck %s define i32 @leaf() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -11,7 +11,7 @@ define i32 @leaf() { } define i32 @self_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@self_rec ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @self_rec() @@ -22,7 +22,7 @@ define i32 @self_rec() { } define i32 @indirect_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@indirect_rec ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @indirect_rec2() @@ -33,7 +33,7 @@ define i32 @indirect_rec() { } define i32 @indirect_rec2() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@indirect_rec2 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @indirect_rec() @@ -44,7 +44,7 @@ define i32 @indirect_rec2() { } define i32 @extern() { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@extern ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -57,7 +57,7 @@ define i32 @extern() { declare i32 @k() readnone define void @intrinsic(ptr %dest, ptr %src, i32 %len) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@intrinsic ; CHECK-SAME: (ptr nocapture writeonly [[DEST:%.*]], ptr nocapture readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DEST]], ptr [[SRC]], i32 [[LEN]], i1 false) @@ -70,7 +70,7 @@ define void @intrinsic(ptr %dest, ptr %src, i32 %len) { declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) define internal i32 @called_by_norecurse() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -81,7 +81,7 @@ define internal i32 @called_by_norecurse() { } define void @m() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@m ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() @@ -92,7 +92,7 @@ define void @m() norecurse { } define internal i32 @called_by_norecurse_indirectly() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -103,7 +103,7 @@ define internal i32 @called_by_norecurse_indirectly() { } define internal void @o() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@o ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() @@ -114,7 +114,7 @@ define internal void @o() { } define void @p() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@p ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: call void @o() @@ -125,7 +125,7 @@ define void @p() norecurse { } define internal i32 @escapes_as_parameter(ptr %p) { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@escapes_as_parameter ; CHECK-SAME: (ptr nocapture readnone [[P:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -136,7 +136,7 @@ define internal i32 @escapes_as_parameter(ptr %p) { } define internal void @q() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@q ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @escapes_as_parameter(ptr @escapes_as_parameter) @@ -147,7 +147,7 @@ define internal void @q() { } define void @r() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@r ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: call void @q() diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll index 77f208e6b0302..1017248e3f64f 100644 --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Base case, empty function define void @test1() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test1( ; CHECK-NEXT: ret void ; @@ -15,7 +15,7 @@ define void @test1() { ; Show the bottom up walk define void @test2() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test2( ; CHECK-NEXT: call void @test1() ; CHECK-NEXT: ret void @@ -38,7 +38,7 @@ define void @test3() convergent { } define i32 @test4(i32 %a, i32 %b) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[A]] @@ -49,7 +49,7 @@ define i32 @test4(i32 %a, i32 %b) { ; negative case - explicit sync define void @test5(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test5( ; CHECK-NEXT: store atomic i8 0, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret void @@ -60,7 +60,7 @@ define void @test5(ptr %p) { ; negative case - explicit sync define i8 @test6(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[V:%.*]] = load atomic i8, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret i8 [[V]] @@ -71,7 +71,7 @@ define i8 @test6(ptr %p) { ; negative case - explicit sync define void @test7(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P:%.*]], i8 0 seq_cst, align 1 ; CHECK-NEXT: ret void @@ -104,7 +104,7 @@ define void @test9(ptr %p) { ; atomic load with monotonic ordering define i32 @load_monotonic(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @load_monotonic( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] monotonic, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -115,7 +115,7 @@ define i32 @load_monotonic(ptr nocapture readonly %0) norecurse nounwind uwtable ; atomic store with monotonic ordering. define void @store_monotonic(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @store_monotonic( ; CHECK-NEXT: store atomic i32 10, ptr [[TMP0:%.*]] monotonic, align 4 ; CHECK-NEXT: ret void @@ -127,7 +127,7 @@ define void @store_monotonic(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with acquire ordering. define i32 @load_acquire(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @load_acquire( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] acquire, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -137,7 +137,7 @@ define i32 @load_acquire(ptr nocapture readonly %0) norecurse nounwind uwtable { } define i32 @load_unordered(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: @load_unordered( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -148,7 +148,7 @@ define i32 @load_unordered(ptr nocapture readonly %0) norecurse nounwind uwtable ; atomic store with unordered ordering. define void @store_unordered(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: @store_unordered( ; CHECK-NEXT: store atomic i32 10, ptr [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret void @@ -161,7 +161,7 @@ define void @store_unordered(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with release ordering define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @load_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void @@ -172,7 +172,7 @@ define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { ; negative volatile, relaxed atomic define void @load_volatile_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @load_volatile_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void @@ -183,7 +183,7 @@ define void @load_volatile_release(ptr nocapture %0) norecurse nounwind uwtable ; volatile store. define void @volatile_store(ptr %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @volatile_store( ; CHECK-NEXT: store volatile i32 14, ptr [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret void @@ -195,7 +195,7 @@ define void @volatile_store(ptr %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; volatile load. define i32 @volatile_load(ptr %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @volatile_load( ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -237,7 +237,7 @@ declare void @llvm.memset(ptr %dest, i8 %val, i32 %len, i1 %isvolatile) ; negative, checking volatile intrinsics. define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @memcpy_volatile( ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], i32 8, i1 true) ; CHECK-NEXT: ret i32 4 @@ -248,7 +248,7 @@ define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) { ; positive, non-volatile intrinsic. define i32 @memset_non_volatile(ptr %ptr1, i8 %val) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: @memset_non_volatile( ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false) ; CHECK-NEXT: ret i32 4 @@ -271,7 +271,7 @@ declare void @readnone_test() convergent readnone ; negative. Convergent define void @convergent_readnone(){ -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: @convergent_readnone( ; CHECK-NEXT: call void @readnone_test() ; CHECK-NEXT: ret void @@ -299,7 +299,7 @@ define void @i_totally_sync() { declare float @llvm.cos(float %val) readnone define float @cos_test(float %x) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: @cos_test( ; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X:%.*]]) ; CHECK-NEXT: ret float [[C]] diff --git a/llvm/test/Transforms/FunctionAttrs/nounwind.ll b/llvm/test/Transforms/FunctionAttrs/nounwind.ll index 7987f7477b900..a147685964dfe 100644 --- a/llvm/test/Transforms/FunctionAttrs/nounwind.ll +++ b/llvm/test/Transforms/FunctionAttrs/nounwind.ll @@ -3,7 +3,7 @@ ; TEST 1 define i32 @foo1() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -13,7 +13,7 @@ define i32 @foo1() { ; TEST 2 define i32 @scc1_foo() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@scc1_foo ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @scc1_bar() @@ -26,7 +26,7 @@ define i32 @scc1_foo() { ; TEST 3 define i32 @scc1_bar() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@scc1_bar ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @scc1_foo() diff --git a/llvm/test/Transforms/FunctionAttrs/optnone.ll b/llvm/test/Transforms/FunctionAttrs/optnone.ll index 260d53b2bfed9..4f097147ff57f 100644 --- a/llvm/test/Transforms/FunctionAttrs/optnone.ll +++ b/llvm/test/Transforms/FunctionAttrs/optnone.ll @@ -20,6 +20,6 @@ declare i8 @strlen(ptr) noinline optnone ; CHECK: (ptr) #1 ; CHECK-LABEL: attributes #0 -; CHECK: = { mustprogress nofree norecurse nosync nounwind readnone willreturn } +; CHECK: = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK-LABEL: attributes #1 ; CHECK: = { noinline optnone } diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 1833d8b561ccf..94ffde15d338a 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -18,7 +18,7 @@ define void @test1_2(ptr %x1_2, ptr %y1_2, ptr %z1_2) { } define ptr @test2(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (ptr readnone returned [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store i32 0, ptr @x, align 4 @@ -29,7 +29,7 @@ define ptr @test2(ptr %p) { } define i1 @test3(ptr %p, ptr %q) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (ptr readnone [[P:%.*]], ptr readnone [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = icmp ult ptr [[P]], [[Q]] @@ -42,7 +42,7 @@ define i1 @test3(ptr %p, ptr %q) { declare void @test4_1(ptr nocapture) readonly define void @test4_2(ptr %p) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@test4_2 ; CHECK-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: call void @test4_1(ptr [[P]]) @@ -54,7 +54,7 @@ define void @test4_2(ptr %p) { ; Missed optz'n: we could make %q readnone, but don't break test6! define void @test5(ptr %p, ptr %q) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: store ptr [[Q]], ptr [[P]], align 8 @@ -81,7 +81,7 @@ define void @test6_2(ptr %p, ptr %q) { ; inalloca parameters are always considered written define void @test7_1(ptr inalloca(i32) %a) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test7_1 ; CHECK-SAME: (ptr nocapture inalloca(i32) [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: ret void @@ -91,7 +91,7 @@ define void @test7_1(ptr inalloca(i32) %a) { ; preallocated parameters are always considered written define void @test7_2(ptr preallocated(i32) %a) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test7_2 ; CHECK-SAME: (ptr nocapture preallocated(i32) [[A:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: ret void @@ -100,7 +100,7 @@ define void @test7_2(ptr preallocated(i32) %a) { } define ptr @test8_1(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8_1 ; CHECK-SAME: (ptr readnone returned [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -111,7 +111,7 @@ entry: } define void @test8_2(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8_2 ; CHECK-SAME: (ptr writeonly [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -128,7 +128,7 @@ entry: declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>%val, <4 x ptr>, i32, <4 x i1>) define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@test9 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL]], <4 x ptr> [[PTRS]], i32 4, <4 x i1> ) @@ -140,7 +140,7 @@ define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) { declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @test10(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@test10 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR9:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[PTRS]], i32 4, <4 x i1> , <4 x i32> undef) @@ -152,7 +152,7 @@ define <4 x i32> @test10(<4 x ptr> %ptrs) { declare <4 x i32> @test11_1(<4 x ptr>) argmemonly nounwind readonly define <4 x i32> @test11_2(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: argmemonly nofree nounwind readonly +; CHECK: Function Attrs: nofree nounwind memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test11_2 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR11:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x ptr> [[PTRS]]) @@ -164,7 +164,7 @@ define <4 x i32> @test11_2(<4 x ptr> %ptrs) { declare <4 x i32> @test12_1(<4 x ptr>) argmemonly nounwind define <4 x i32> @test12_2(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test12_2 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR12:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x ptr> [[PTRS]]) @@ -175,7 +175,7 @@ define <4 x i32> @test12_2(<4 x ptr> %ptrs) { } define i32 @volatile_load(ptr %p) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_load ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr [[P]], align 4 @@ -246,7 +246,7 @@ define void @fptr_test1b(ptr %p, ptr %f) { } define void @fptr_test1c(ptr %p, ptr %f) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@fptr_test1c ; CHECK-SAME: (ptr readnone [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: call void [[F]](ptr readnone [[P]]) #[[ATTR2:[0-9]+]] @@ -278,7 +278,7 @@ define void @fptr_test2b(ptr %p, ptr %f) { } define void @fptr_test2c(ptr %p, ptr %f) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@fptr_test2c ; CHECK-SAME: (ptr readonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: call void [[F]](ptr readonly [[P]]) #[[ATTR2]] @@ -289,7 +289,7 @@ define void @fptr_test2c(ptr %p, ptr %f) { } define void @alloca_recphi() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@alloca_recphi ; CHECK-SAME: () #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/FunctionAttrs/stats.ll b/llvm/test/Transforms/FunctionAttrs/stats.ll index 2f36939846d20..5f007b4078ff3 100644 --- a/llvm/test/Transforms/FunctionAttrs/stats.ll +++ b/llvm/test/Transforms/FunctionAttrs/stats.ll @@ -16,13 +16,11 @@ entry: ret void } -; CHECK: 1 function-attrs - Number of functions marked argmemonly +; CHECK: 2 function-attrs - Number of functions with improved memory attribute ; CHECK-NEXT: 1 function-attrs - Number of arguments marked nocapture ; CHECK-NEXT: 1 function-attrs - Number of functions marked as nofree ; CHECK-NEXT: 2 function-attrs - Number of functions marked as norecurse ; CHECK-NEXT: 2 function-attrs - Number of functions marked as nosync ; CHECK-NEXT: 2 function-attrs - Number of functions marked as nounwind -; CHECK-NEXT: 1 function-attrs - Number of functions marked readonly ; CHECK-NEXT: 1 function-attrs - Number of arguments marked readonly ; CHECK-NEXT: 2 function-attrs - Number of functions marked as willreturn -; CHECK-NEXT: 1 function-attrs - Number of functions marked writeonly diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll index 304c415951c64..ecc9a249046ab 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll @@ -38,7 +38,7 @@ define void @test_fn_willreturn(ptr %ptr) willreturn { } define void @test_fn_mustprogress_readonly_calls(ptr %ptr) mustprogress { -; CHECK: Function Attrs: mustprogress nofree readonly willreturn +; CHECK: Function Attrs: mustprogress nofree willreturn memory(read) ; CHECK-LABEL: @test_fn_mustprogress_readonly_calls( ; CHECK-NOT: call void @decl_readonly() # ; CHECK-NOT: call void @decl_readnone() # diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll index 3413b96556195..1c422ea476d26 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll @@ -2,7 +2,7 @@ ; RUN: opt -function-attrs -S %s | FileCheck %s define void @mustprogress_readnone() mustprogress { -; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CHECK-LABEL: @mustprogress_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -17,7 +17,7 @@ while.body: } define i32 @mustprogress_load(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @mustprogress_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -34,7 +34,7 @@ while.body: } define void @mustprogress_store(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind memory(argmem: write) ; CHECK-LABEL: @mustprogress_store( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -63,7 +63,7 @@ define void @mustprogress_call_unknown_fn() mustprogress { } define i32 @mustprogress_call_known_functions(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @mustprogress_call_known_functions( ; CHECK-NEXT: call void @mustprogress_readnone() ; CHECK-NEXT: [[R:%.*]] = call i32 @mustprogress_load(ptr [[PTR:%.*]]) @@ -77,7 +77,7 @@ define i32 @mustprogress_call_known_functions(ptr %ptr) mustprogress { declare i32 @__gxx_personality_v0(...) define i64 @mustprogress_mayunwind() mustprogress personality ptr @__gxx_personality_v0 { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: @mustprogress_mayunwind( ; CHECK-NEXT: [[A:%.*]] = invoke i64 @fn_noread() ; CHECK-NEXT: to label [[A:%.*]] unwind label [[B:%.*]] @@ -141,7 +141,7 @@ define void @willreturn_non_returning_function(i1 %c, ptr %p) { ; Infinite loop without mustprogress, will not return. define void @willreturn_loop() { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_loop( ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -156,7 +156,7 @@ loop: ; Finite loop. Could be willreturn but not detected. ; FIXME define void @willreturn_finite_loop() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_finite_loop( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -183,7 +183,7 @@ end: ; Infinite recursion without mustprogress, will not return. define void @willreturn_recursion() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_recursion( ; CHECK-NEXT: tail call void @willreturn_recursion() ; CHECK-NEXT: ret void @@ -194,7 +194,7 @@ define void @willreturn_recursion() { ; Irreducible infinite loop, will not return. define void @willreturn_irreducible(i1 %c) { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_irreducible( ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index 40ca265990487..0c8ec05223b36 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define void @nouses-argworn-funrn(ptr writeonly %.aaa) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funrn ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funrn_entry: @@ -13,7 +13,7 @@ nouses-argworn-funrn_entry: } define void @nouses-argworn-funro(ptr writeonly %.aaa, ptr %.bbb) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funro ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]], ptr nocapture readonly [[DOTBBB:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funro_entry: @@ -30,7 +30,7 @@ nouses-argworn-funro_entry: @d-ccc = internal global %_type_of_d-ccc <{ ptr null, i8 1, i8 13, i8 0, i8 -127 }>, align 8 define void @nouses-argworn-funwo(ptr writeonly %.aaa) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funwo ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funwo_entry: @@ -43,7 +43,7 @@ nouses-argworn-funwo_entry: } define void @test_store(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_store ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: store i8 0, ptr [[P]], align 1 @@ -55,7 +55,7 @@ define void @test_store(ptr %p) { @G = external global ptr define i8 @test_store_capture(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: read, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@test_store_capture ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: store ptr [[P]], ptr @G, align 8 @@ -70,7 +70,7 @@ define i8 @test_store_capture(ptr %p) { } define void @test_addressing(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_addressing ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 8 @@ -83,7 +83,7 @@ define void @test_addressing(ptr %p) { } define void @test_readwrite(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_readwrite ; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[P]], align 1 @@ -96,7 +96,7 @@ define void @test_readwrite(ptr %p) { } define void @test_volatile(ptr %p) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_volatile ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: store volatile i8 0, ptr [[P]], align 1 @@ -107,7 +107,7 @@ define void @test_volatile(ptr %p) { } define void @test_atomicrmw(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_atomicrmw ; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i8 0 seq_cst, align 1 @@ -134,7 +134,7 @@ declare void @direct2_callee(ptr %p) writeonly ; writeonly w/o nocapture is not enough define void @direct2(ptr %p) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@direct2 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CHECK-NEXT: call void @direct2_callee(ptr [[P]]) @@ -146,9 +146,9 @@ define void @direct2(ptr %p) { } define void @direct2b(ptr %p) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@direct2b -; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR8]] { +; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR8]] { ; CHECK-NEXT: call void @direct2_callee(ptr nocapture [[P]]) ; CHECK-NEXT: ret void ; @@ -209,9 +209,9 @@ define void @fptr_test2(ptr %p, ptr %f) { } define void @fptr_test3(ptr %p, ptr %f) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@fptr_test3 -; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR8]] { +; CHECK-SAME: (ptr nocapture [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR8]] { ; CHECK-NEXT: call void [[F]](ptr nocapture [[P]]) #[[ATTR8]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll index 8923fec97a63d..526076cd9a85b 100644 --- a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll +++ b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll @@ -115,5 +115,5 @@ define internal void @ctor8() { declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/GlobalOpt/pr54572.ll b/llvm/test/Transforms/GlobalOpt/pr54572.ll index e4f3264b5871e..83640452ed1c4 100644 --- a/llvm/test/Transforms/GlobalOpt/pr54572.ll +++ b/llvm/test/Transforms/GlobalOpt/pr54572.ll @@ -19,5 +19,5 @@ define void @test() { ret void } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 5e795b9baf07a..e5c61c2f3e479 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -1081,25 +1081,25 @@ declare void @memset_pattern8(i8*, i8*, i64) declare void @memset_pattern16(i8*, i8*, i64) ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn memory(write) } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { argmemonly mustprogress nofree nounwind willreturn } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(read) } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind memory(read) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_WILLRETURN]] = { mustprogress nofree willreturn } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly mustprogress nofree nounwind readonly willreturn } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(argmem: read) } ; CHECK-DAG: attributes [[NOFREE]] = { nofree } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { argmemonly nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn "alloc-family"="malloc" } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { nofree nounwind memory(argmem: readwrite) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } -; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind readnone } +; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind memory(none) } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } diff --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll index 9b90f59d3602b..12a59ea02cc88 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll @@ -52,5 +52,5 @@ attributes #1 = { nounwind readnone speculatable } !28 = !DILocation(line: 9, column: 18, scope: !2) !29 = !DILocation(line: 10, column: 1, scope: !2) -; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn } +; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) } ; CHECK-NOT: foo.coefficient1 diff --git a/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll b/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll index bf16bc80242a7..c1a5f11cf94ab 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll @@ -5,4 +5,4 @@ ; CHECK: declare double @acos(double) [[NOFREE_NOUNWIND_WILLRETURN_READNONE:#[0-9]+]] declare double @acos(double) readonly -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_READNONE]] = { mustprogress nofree nosync nounwind readnone willreturn } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_READNONE]] = { mustprogress nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Transforms/Inline/cgscc-update.ll b/llvm/test/Transforms/Inline/cgscc-update.ll index 5b39d5914f564..b5c30360548c1 100644 --- a/llvm/test/Transforms/Inline/cgscc-update.ll +++ b/llvm/test/Transforms/Inline/cgscc-update.ll @@ -9,8 +9,8 @@ ; CHECK: declare void @unknown() declare void @unknown() -; Basic correctness check: this should get annotated as readnone. -; CHECK: Function Attrs: nounwind readnone +; Basic correctness check: this should get annotated as memory(none). +; CHECK: Function Attrs: nounwind memory(none) ; CHECK-NEXT: declare void @readnone() declare void @readnone() readnone nounwind @@ -26,8 +26,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test1_g() define void @test1_g() noinline { entry: @@ -35,8 +35,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test1_h() define void @test1_h() noinline { entry: @@ -58,8 +58,8 @@ entry: ret void()* @test2_h } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test2_g() define void @test2_g() noinline { entry: @@ -68,8 +68,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test2_h() define void @test2_h() noinline { entry: @@ -151,8 +151,8 @@ exit: ; interesting call graph update for the new call edge. Eventually, we still ; form a new SCC and should use that can deduce precise function attrs. -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test4_f1() define void @test4_f1() noinline { entry: @@ -174,8 +174,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test4_h() define void @test4_h() noinline { entry: diff --git a/llvm/test/Transforms/Inline/inline_invoke.ll b/llvm/test/Transforms/Inline/inline_invoke.ll index 59df19dc8a0d5..8dfb170b14804 100644 --- a/llvm/test/Transforms/Inline/inline_invoke.ll +++ b/llvm/test/Transforms/Inline/inline_invoke.ll @@ -343,7 +343,7 @@ terminate: ; CHECK-NEXT: call void @_ZSt9terminatev() ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind memory(none) } ; CHECK: attributes #2 = { ssp uwtable } -; CHECK: attributes #3 = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #4 = { noreturn nounwind } diff --git a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll index 2a36dadaba927..56491c5efa4f4 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -65,6 +65,6 @@ entry: declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -; CHECK: attributes #0 = { nounwind readnone ssp } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #0 = { nounwind ssp memory(none) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll index 6501ca4c6ff8b..9f9442705d126 100644 --- a/llvm/test/Transforms/InstCombine/stpncpy-1.ll +++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll @@ -448,6 +448,6 @@ define void @call_stpncpy_s(ptr %dst, ptr %src, i64 %n) { ret void } ;. -; ANY: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; ANY: attributes #[[ATTR1:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; ANY: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ANY: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll index 972ed74988bd2..479be9d87a01a 100644 --- a/llvm/test/Transforms/LICM/scalar-promote.ll +++ b/llvm/test/Transforms/LICM/scalar-promote.ll @@ -600,7 +600,7 @@ Out: } define i8 @test_hoistable_existing_load_sinkable_store_writeonly(ptr dereferenceable(8) %ptr, i8 %start) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_hoistable_existing_load_sinkable_store_writeonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR:%.*]], align 1 @@ -641,7 +641,7 @@ exit: ; Test case for PR51248. define void @test_sink_store_only() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_only( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] @@ -678,7 +678,7 @@ exit: } define void @test_sink_store_to_local_object_only_loop_must_execute() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_must_execute( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -719,7 +719,7 @@ exit: ; The store in the loop may not execute, so we need to introduce a load in the ; pre-header. Make sure the writeonly attribute is dropped. define void @test_sink_store_to_local_object_only_loop_may_not_execute(i8 %n) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_may_not_execute( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -761,7 +761,7 @@ exit: declare dereferenceable(8) noalias ptr @alloc_writeonly() writeonly define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(i8 %n) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = call noalias dereferenceable(8) ptr @alloc_writeonly() @@ -801,7 +801,7 @@ exit: } define void @test_sink_store_only_no_phi_needed() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_only_no_phi_needed( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LICM/strlen.ll b/llvm/test/Transforms/LICM/strlen.ll index fc47660aaafb7..e4e72b253e05e 100644 --- a/llvm/test/Transforms/LICM/strlen.ll +++ b/llvm/test/Transforms/LICM/strlen.ll @@ -13,7 +13,7 @@ loop: } ; CHECK: declare i64 @strlen(ptr nocapture) #0 -; CHECK: attributes #0 = { argmemonly mustprogress nofree nounwind readonly willreturn } +; CHECK: attributes #0 = { mustprogress nofree nounwind willreturn memory(argmem: read) } declare i64 @strlen(ptr) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll index 650afc820b729..e5dec5d06eca3 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll @@ -85,4 +85,4 @@ declare void @llvm.matrix.column.major.store.v10f64.i64(<10 x double>, double*, ; CHECK: declare void @llvm.matrix.column.major.store.v6f64.i64(<6 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 ; CHECK: declare void @llvm.matrix.column.major.store.v10f64.i64(<10 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll index 62ce2cffaf115..8178d48fa8c85 100644 --- a/llvm/test/Transforms/ObjCARC/basic.ll +++ b/llvm/test/Transforms/ObjCARC/basic.ll @@ -3073,5 +3073,5 @@ define void @test68(i8* %a, i8* %b) { !5 = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: ![[RELEASE]] = !{} diff --git a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll index 03257d2e5429a..a31be00210286 100644 --- a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -105,7 +105,7 @@ declare void @NSLog(i8*, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { ssp uwtable } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #2 = { nonlazybind } ; CHECK: attributes [[NUW]] = { nounwind } ; CHECK: attributes #4 = { noinline ssp uwtable } diff --git a/llvm/test/Transforms/ObjCARC/nested.ll b/llvm/test/Transforms/ObjCARC/nested.ll index e1c46a0861d42..cc947717de46f 100644 --- a/llvm/test/Transforms/ObjCARC/nested.ll +++ b/llvm/test/Transforms/ObjCARC/nested.ll @@ -821,5 +821,5 @@ entry: ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #2 = { nonlazybind } diff --git a/llvm/test/Transforms/ObjCARC/rle-s2l.ll b/llvm/test/Transforms/ObjCARC/rle-s2l.ll index e13ff35e91f28..d62147875a558 100644 --- a/llvm/test/Transforms/ObjCARC/rle-s2l.ll +++ b/llvm/test/Transforms/ObjCARC/rle-s2l.ll @@ -135,4 +135,4 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) { } ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes [[RO]] = { readonly } +; CHECK: attributes [[RO]] = { memory(read) } diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index 9d48cd1c7bca5..7ad863c38ef07 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1211,67 +1211,67 @@ attributes #0 = { noinline cold } ; CHECK: ; Function Attrs: cold convergent noinline nounwind ; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_dynamic(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_nested(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_max_active_levels(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) ; OPTIMISTIC-NOT: Function Attrs @@ -1313,7 +1313,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local double @omp_get_wtime() ; OPTIMISTIC-NOT: Function Attrs @@ -1340,7 +1340,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() ; OPTIMISTIC-NOT: Function Attrs @@ -1370,25 +1370,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) ; OPTIMISTIC-NOT: Function Attrs @@ -1433,10 +1433,10 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: nounwind @@ -1445,25 +1445,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind @@ -1499,43 +1499,43 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind @@ -1556,52 +1556,52 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1619,7 +1619,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nounwind @@ -1673,7 +1673,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index ec2f3e219d21d..108499631d196 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -72,7 +72,7 @@ define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 @@ -194,9 +194,9 @@ entry: define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() #[[ATTR4]] +; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0 @@ -338,7 +338,7 @@ define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: @@ -466,7 +466,7 @@ define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 ; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR9]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 @@ -541,7 +541,7 @@ define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR14:[0-9]+]] ; CHECK-NEXT: store i32 1, i32* [[A1]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** ; CHECK-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 27f4e3e608ec9..3e0abd5deadbd 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -83,7 +83,7 @@ define internal void @bar() { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -91,7 +91,7 @@ define internal void @bar() { ; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] ; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-DISABLED-NEXT: ret void ; @@ -257,19 +257,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ;. ; CHECK: attributes #[[ATTR0]] = { nounwind } ; CHECK: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK: attributes #[[ATTR2]] = { nounwind readnone } -; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind writeonly } +; CHECK: attributes #[[ATTR2]] = { nounwind memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind readnone } -; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind writeonly } +; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind memory(none) } +; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } ; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK-DISABLED: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index df2cd952cd278..9651c0e0ad471 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -150,8 +150,8 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -163,14 +163,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -185,15 +185,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: ; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR7]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nofree norecurse nounwind writeonly +; CHECK: Function Attrs: nofree norecurse nounwind memory(write) ; CHECK-LABEL: define {{[^@]+}}@use.internalized ; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -208,6 +208,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: ret void ; ; +; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4 @@ -216,14 +217,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind writeonly } -; CHECK: attributes #[[ATTR2]] = { nosync nounwind readonly allocsize(0) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) } +; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR6]] = { nounwind readonly } -; CHECK: attributes #[[ATTR7]] = { nounwind writeonly } -; CHECK: attributes #[[ATTR8]] = { nounwind } +; CHECK: attributes #[[ATTR6]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 0841600da2456..c707256b12c96 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -702,9 +702,9 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) +; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* +; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* ; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -716,18 +716,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 +; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* +; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* ; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: @@ -738,18 +738,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* +; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* ; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -761,18 +761,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 +; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* +; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* ; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: @@ -783,9 +783,9 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; @@ -2426,7 +2426,7 @@ attributes #11 = { convergent } ; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU: attributes #[[ATTR8]] = { convergent } -; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2439,7 +2439,7 @@ attributes #11 = { convergent } ; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX: attributes #[[ATTR8]] = { convergent } -; NVPTX: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2452,7 +2452,7 @@ attributes #11 = { convergent } ; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2465,7 +2465,7 @@ attributes #11 = { convergent } ; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 6c7fcd30f3de6..3d58485871918 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -146,7 +146,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } !11 = !{!"Simple C/C++ TBAA"} ;. ; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } ; CHECK: attributes #[[ATTR2]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } ; CHECK: attributes #[[ATTR3]] = { nounwind } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index fa5a2ee8c7f73..411d73a066c02 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -396,11 +396,11 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind readonly willreturn "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; CHECK: attributes #[[ATTR6]] = { nounwind } -; CHECK: attributes #[[ATTR7:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } ; CHECK: attributes #[[ATTR9]] = { nounwind willreturn } ; CHECK: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } @@ -408,11 +408,11 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind readonly willreturn "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR7:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } ; CHECK-DISABLED: attributes #[[ATTR9]] = { nounwind willreturn } ; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index 5896d4166e2cb..e49de449e5113 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -155,7 +155,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @generic_helper() #[[ATTR5]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -168,7 +168,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: @@ -194,7 +194,7 @@ define internal void @spmd_helper() #1 { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) @@ -204,7 +204,7 @@ define internal void @spmd_helper() #1 { ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* @__omp_outlined___wrapper.ID, i8** [[TMP1]], i64 0) @@ -226,7 +226,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -234,7 +234,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -328,13 +328,13 @@ define internal void @generic_helper() #1 { ; CHECK-LABEL: define {{[^@]+}}@generic_helper ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -371,19 +371,17 @@ attributes #5 = { convergent } ; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK: attributes #[[ATTR6]] = { convergent nounwind writeonly } -; CHECK: attributes #[[ATTR7]] = { convergent } +; CHECK: attributes #[[ATTR6]] = { convergent } ;. ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nounwind writeonly } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll b/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll index b0d6531f05342..ce8d977b8d4a5 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -S -rewrite-statepoints-for-gc | FileCheck %s -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) ; CHECK: declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll b/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll index 1580b09f25882..1eb6cefc4dcd8 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll @@ -20,6 +20,6 @@ define void @test1(i8 addrspace(1)* %arg) gc "statepoint-example" { attributes #1 = { norecurse noimplicitfloat } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR1]] = { noimplicitfloat norecurse } ;. diff --git a/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll b/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll index 015c9afb3720e..22726e0cac1f1 100644 --- a/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll +++ b/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals ; RUN: opt -passes=ipsccp -S %s | FileCheck %s ; Test cases to ensure argmemonly/inaccessiblemem_or_argmemonly attributes are @@ -10,8 +10,11 @@ ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop argmemonly. +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0 +;. define internal void @ptrarg.1(ptr %arg, i32 %val) argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @ptrarg.1( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -37,7 +40,7 @@ define i32 @caller.1(i32 %n) { ; Here only the non-pointer argument %val is replaced, no need ; to drop the argmemonly attribute. define internal void @ptrarg.2(ptr %arg, i32 %val) argmemonly nounwind { -; CHECK: Function Attrs: argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite) ; CHECK-LABEL: @ptrarg.2( ; CHECK-NEXT: store i32 10, ptr [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void @@ -59,7 +62,7 @@ define void @caller.2(ptr %ptr) { ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop inaccessiblemem_or_argmemonly. define internal void @ptrarg.3(ptr %arg, i32 %val) inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite) ; CHECK-LABEL: @ptrarg.3( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -85,7 +88,7 @@ define i32 @caller.3(i32 %n) { ; Here only the non-pointer argument %val is replaced, no need ; to drop the inaccessiblemem_or_argmemonly attribute. define internal void @ptrarg.4(ptr %arg, i32 %val) inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: @ptrarg.4( ; CHECK-NEXT: store i32 10, ptr [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void @@ -107,7 +110,7 @@ define void @caller.4(ptr %ptr) { ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop inaccessiblemem_or_argmemonly. define internal void @ptrarg.5(ptr %arg, i32 %val) argmemonly inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @ptrarg.5( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -143,10 +146,10 @@ define internal void @ptrarg.6.cs.attributes(ptr %arg, i32 %val) { define i32 @caller.6.cs.attributes(i32 %n) { ; CHECK-LABEL: @caller.6.cs.attributes( ; CHECK-NEXT: store i32 1, ptr @g, align 4 -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR0]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[G_VAL:%.*]] = load i32, ptr @g, align 4 ; CHECK-NEXT: ret i32 [[G_VAL]] ; @@ -159,4 +162,10 @@ define i32 @caller.6.cs.attributes(i32 %n) { ret i32 %g.val } -; CHECK: [[NOUNWIND]] = { nounwind } +;. +; CHECK: attributes #[[ATTR0]] = { nounwind memory(readwrite, inaccessiblemem: none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nounwind memory(readwrite) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nounwind } +;. diff --git a/llvm/test/Transforms/SCCP/remove-call-inst.ll b/llvm/test/Transforms/SCCP/remove-call-inst.ll index b4ab128bffa28..6881b82a938fb 100644 --- a/llvm/test/Transforms/SCCP/remove-call-inst.ll +++ b/llvm/test/Transforms/SCCP/remove-call-inst.ll @@ -36,4 +36,4 @@ return: } ; CHECK: attributes #0 = { noreturn nounwind } -; CHECK: attributes #1 = { nounwind readnone willreturn } +; CHECK: attributes #1 = { nounwind willreturn memory(none) } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index f39b03c01dc6a..cb868d440b69d 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -64,7 +64,7 @@ entry: ret void } -; CHECK-IL: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn +; CHECK-IL: Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-IL-NEXT: declare void @llvm.pseudoprobe(i64, i64, i32, i64) ; CHECK-IL: ![[#FOO:]] = distinct !DISubprogram(name: "foo" diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll index b1802ba8022ba..bfbba255f3c6f 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll @@ -68,7 +68,7 @@ declare void @destructor() declare dso_local i32 @__gxx_personality_v0(...) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "", directory: "/") diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll index 20ecd03476c63..7b0359aa1d71a 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll @@ -2475,5 +2475,5 @@ declare dso_local i32 @__gxx_personality_v0(...) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } ; CHECK: attributes #[[ATTR1]] = { nomerge } -; CHECK: attributes #[[ATTR2]] = { readnone } +; CHECK: attributes #[[ATTR2]] = { memory(none) } ;. diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll index fc200e041125e..7965c8d23cd00 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -1133,7 +1133,7 @@ exit: ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable } ; CHECK: attributes #[[ATTR1]] = { nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind readnone ssp } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind ssp memory(none) } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 11} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 3} diff --git a/llvm/test/Verifier/fp-intrinsics.ll b/llvm/test/Verifier/fp-intrinsics.ll index 744c215ce23e1..670acc58550e8 100644 --- a/llvm/test/Verifier/fp-intrinsics.ll +++ b/llvm/test/Verifier/fp-intrinsics.ll @@ -12,7 +12,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat ; attached to the FP intrinsic. ; CHECK1: declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #[[ATTR:[0-9]+]] ; CHECK1: declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) #[[ATTR]] -; CHECK1: attributes #[[ATTR]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK1: attributes #[[ATTR]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; Note: FP exceptions aren't usually caught through normal unwind mechanisms, ; but we may want to revisit this for asynchronous exception handling. define double @f1(double %a, double %b) #0 { diff --git a/llvm/test/Verifier/writeonly.ll b/llvm/test/Verifier/writeonly.ll deleted file mode 100644 index 0eeaebbc3a889..0000000000000 --- a/llvm/test/Verifier/writeonly.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s - -declare void @a() readnone writeonly -; CHECK: Attributes {{.*}} are incompatible - -declare void @b() readonly writeonly -; CHECK: Attributes {{.*}} are incompatible - -declare void @c(i32* readnone writeonly %p) -; CHECK: Attributes {{.*}} are incompatible - -declare void @d(i32* readonly writeonly %p) -; CHECK: Attributes {{.*}} are incompatible diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected index 4fff2d2836f15..0177b6f0306ff 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected @@ -6,7 +6,7 @@ %struct.ST = type { i32, double, %struct.RT } define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { -; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (%struct.ST* nofree readnone [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected index 3f56cfed2b925..345a5223fedb4 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected @@ -249,8 +249,8 @@ attributes #3 = { nounwind } !61 = !{!"branch_weights", i32 1, i32 1048575} ;. ; CHECK: attributes #[[ATTR0]] = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR3]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) diff --git a/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll b/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll index 3eb43c344ea8e..2aec90b086f7f 100644 --- a/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll +++ b/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll @@ -26,7 +26,7 @@ define i32 @t(i32 %a) { ; CHECK-ALL: declare i32 @llvm.uadd.sat.i32(i32, i32) #0 declare i32 @llvm.uadd.sat.i32(i32, i32) #0 -; CHECK-ALL: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK-ALL: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK-INTERESTINGNESS: attributes #1 = { ; CHECK-INTERESTINGNESS-SAME: "arg4" diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index c80bfa9762663..65cb48a7ab038 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -731,11 +731,11 @@ TEST(InstructionsTest, CloneCall) { // Test cloning an attribute. { AttrBuilder AB(C); - AB.addAttribute(Attribute::ReadOnly); + AB.addAttribute(Attribute::NoUnwind); Call->setAttributes( AttributeList::get(C, AttributeList::FunctionIndex, AB)); std::unique_ptr Clone(cast(Call->clone())); - EXPECT_TRUE(Clone->onlyReadsMemory()); + EXPECT_TRUE(Clone->doesNotThrow()); } } diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index ec3023f843db4..a6d7e2ce949bf 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -776,49 +776,53 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, case CodeGenIntrinsic::NoMem: if (Intrinsic.hasSideEffects) break; - OS << " Attribute::get(C, Attribute::ReadNone),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::none()),\n"; break; case CodeGenIntrinsic::ReadArgMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::Ref)),\n"; break; case CodeGenIntrinsic::ReadMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::readOnly()),\n"; break; case CodeGenIntrinsic::ReadInaccessibleMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref)),\n"; break; case CodeGenIntrinsic::ReadInaccessibleMemOrArgMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::Ref)),\n"; + break; break; case CodeGenIntrinsic::WriteArgMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::WriteMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::writeOnly()),\n"; break; case CodeGenIntrinsic::WriteInaccessibleMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::WriteInaccessibleMemOrArgMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::ReadWriteArgMem: - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteInaccessibleMem: - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteInaccessibleMemOrArgMem: - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteMem: break; diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 127a7404431ed..6ed9d4aac4b76 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -327,7 +327,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:' OPT_FUNCTION_RE = re.compile( - r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' + r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s():,]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' r'(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$', flags=(re.M | re.S)) diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 2c4f54c765352..0ca8bb02c2bb1 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -2009,5 +2009,5 @@ llvm.func @vararg_function(%arg0: i32, ...) { // Function attributes: readnone // CHECK: declare void @readnone_function() #[[ATTR:[0-9]+]] -// CHECK: attributes #[[ATTR]] = { readnone } +// CHECK: attributes #[[ATTR]] = { memory(none) } llvm.func @readnone_function() attributes {llvm.readnone} From 389cf0adcb0ba2df347e49dcf15f993a4593bdc6 Mon Sep 17 00:00:00 2001 From: wanglei Date: Fri, 4 Nov 2022 17:50:31 +0800 Subject: [PATCH 235/516] [LoongArch] Change the name of LoongArchPreRAExpandPseudo pass By convention, pass names use lowercase letters. LoongArch-prera-expand-pseudo -> loongarch-prera-expand-pseudo. Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D137402 --- llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index 1961444be8cfb..e49c162cdb781 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -241,7 +241,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( } // end namespace -INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "LoongArch-prera-expand-pseudo", +INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false) namespace llvm { From a8d7ad70aac1148ce7c3458194c322c9536b1409 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Wed, 2 Nov 2022 19:08:02 +0530 Subject: [PATCH 236/516] [AMDGPU] Skip stack-arg dbg objects while fixing the dead frame indices Both SGPR->VGPR and VGPR->AGPR spilling code give a fixup to the spill frame indices referred in debug instructions so that they can be entirely removed. We should skip the stack argument debug objects while looking inside the bitvector with FI as the index that tracks the spill indices being processed. The stack args will have negative indices and would crash while accessing the bitvector. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D137277 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 1 + llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 1 + ...fi-skip-processing-stack-arg-dbg-value.mir | 56 +++++++++++++++++++ ...fi-skip-processing-stack-arg-dbg-value.mir | 56 +++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index fe5090f9c01c2..c785cfdaaef82 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1188,6 +1188,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // correct register value. But not sure the register value alone is for (MachineInstr &MI : MBB) { if (MI.isDebugValue() && MI.getOperand(0).isFI() && + !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && SpillFIs[MI.getOperand(0).getIndex()]) { MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); } diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 251ac626e21cb..345395db13fb4 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -319,6 +319,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // adequate to lower the DIExpression. It should be worked out later. for (MachineInstr &MI : MBB) { if (MI.isDebugValue() && MI.getOperand(0).isFI() && + !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && SpillFIs[MI.getOperand(0).getIndex()]) { MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); } diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir new file mode 100644 index 0000000000000..4694810379fe0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s + +# After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, we replace the dead frame index in the DBG_VALUE instruction with reg 0. +# Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. +# It is safe to skip the fixed stack objects as they will never become the spill objects. + +--- | + define amdgpu_kernel void @test() { ret void } + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4) + !1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6) + !2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5) + !3 = !DISubroutineType(types: !4) + !4 = !{null} + !5 = !{!1} + !6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32) + !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !8 = !DIExpression() + !9 = !DILocation(line: 10, column: 9, scope: !2) + +... +--- +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +fixedStack: + - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default } +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK: DBG_VALUE $noreg, 0 + bb.0: + renamable $sgpr10 = IMPLICIT_DEF + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9 + + bb.1: + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir new file mode 100644 index 0000000000000..2058a94b0614a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-vgpr-to-agpr=true -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s + +# After handling the VGPR spill to AGPR copy, we replace the dead frame index in the DBG_VALUE instruction with reg 0. +# Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. +# It is safe to skip the fixed stack objects as they will never become the spill objects. + +--- | + define amdgpu_kernel void @test() { ret void } + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4) + !1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6) + !2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5) + !3 = !DISubroutineType(types: !4) + !4 = !{null} + !5 = !{!1} + !6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32) + !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !8 = !DIExpression() + !9 = !DILocation(line: 10, column: 9, scope: !2) + +... +--- +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +fixedStack: + - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default } +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledVGPRs: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK: DBG_VALUE $noreg, 0 + bb.0: + $vgpr2 = IMPLICIT_DEF + SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) + DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9 + + bb.1: + renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + S_ENDPGM 0 From fd64de32129977f3bb52d874f499ed0a98214db3 Mon Sep 17 00:00:00 2001 From: Oleg Shyshkov Date: Fri, 4 Nov 2022 12:06:31 +0100 Subject: [PATCH 237/516] [mlir][linalg] Add BroadcastOp to Linalg structured ops. [[RFC] Primitive Ops: add BroadcastOp to Linalg](https://discourse.llvm.org/t/rfc-primitive-ops-add-broadcastop-to-linalg/66313?u=olegshyshkov) Differential Revision: https://reviews.llvm.org/D137331 --- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 77 +++++++- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 182 +++++++++++++++--- mlir/test/Dialect/Linalg/invalid.mlir | 78 ++++++++ .../Dialect/Linalg/one-shot-bufferize.mlir | 13 ++ mlir/test/Dialect/Linalg/roundtrip.mlir | 50 +++++ .../lower-to-loops-using-interface.mlir | 26 +++ 6 files changed, 401 insertions(+), 25 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index b067a1ddd1e61..9866620fd4892 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -440,7 +440,9 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [ static std::function)> - getRegionBuilder(); + getRegionBuilder() { + return nullptr; + } static void createRegion(::mlir::OpBuilder &opBuilder, ::mlir::OperationState & odsState); @@ -450,6 +452,79 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [ let hasVerifier = 1; } + +//===----------------------------------------------------------------------===// +// Broadcast op. +//===----------------------------------------------------------------------===// + +def BroadcastOp : LinalgStructuredBase_Op<"broadcast", [ + DeclareOpInterfaceMethods, + SameVariadicOperandSize, + SingleBlockImplicitTerminator<"YieldOp">]> { + let summary = "Static broadcast operator"; + let description = [{ + Broadcast the input into the given shape by adding dimensions. + + Each index in `dimensions` attribute maps input dimension into the + corresponding target dimension. The length of the `dimensions` list should + match the `input` rank and dimensions should be in sorted order. There is no + ambiguity at compile-time about shape information. + + Example: + ``` + %bcast = linalg.broadcast + ins(%input:tensor<16xf32>) + inits(%init:tensor<16x64xf32>) + dimensions = [0] + ``` + }]; + + let arguments = (ins + // Input arg + TensorOrMemref:$input, + // Output arg + TensorOrMemref:$init, + + DenseI64ArrayAttr:$dimensions + ); + let results = (outs Variadic:$result); + let regions = (region SizedRegion<1>:$region); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$init, + "DenseI64ArrayAttr":$dimensions, CArg<"ArrayRef", + "{}">:$attributes)>, + OpBuilder<(ins "Value":$input, "Value":$init, + "ArrayRef":$dimensions, CArg<"ArrayRef", + "{}">:$attributes)>, + ]; + + let extraClassDeclaration = structuredOpsBaseDecls # [{ + // Declare functions necessary for LinalgStructuredInterface. + SmallVector getIteratorTypesArray(); + ArrayAttr getIndexingMaps(); + std::string getLibraryCallName() { + return "op_has_no_registered_library_name"; + } + + // Implement functions necessary for DestinationStyleOpInterface. + std::pair getDpsInitsPositionRange() { + int64_t getNumOperands = this->getNumOperands(); + return {getNumOperands - 1, getNumOperands}; + } + + static std::function)> + getRegionBuilder() { + return nullptr; + } + }]; + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Named Linalg ops, implemented as a declarative configurations of generic ops. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 568b9317ca364..6377a68bc3c5d 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -662,7 +662,7 @@ void FillOp::getCanonicalizationPatterns(RewritePatternSet &results, //===----------------------------------------------------------------------===// static void buildGenericRegion( - OpBuilder &builder, OperationState &result, ValueRange inputs, + OpBuilder &builder, Location loc, Region ®ion, ValueRange inputs, ValueRange outputs, function_ref bodyBuild) { SmallVector blockArgTypes; @@ -675,10 +675,9 @@ static void buildGenericRegion( } OpBuilder::InsertionGuard guard(builder); - auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuild(builder, result.location, bodyBlock->getArguments()); + bodyBuild(builder, loc, bodyBlock->getArguments()); } void GenericOp::getAsmBlockArgumentNames(Region ®ion, @@ -699,7 +698,8 @@ void GenericOp::build( iteratorTypes, doc, libraryCall); result.addAttributes(attributes); if (bodyBuild) - buildGenericRegion(builder, result, inputs, outputs, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, outputs, bodyBuild); } void GenericOp::build( @@ -1346,7 +1346,8 @@ void MapOp::build( result.addTypes(initType); if (bodyBuild) - buildGenericRegion(builder, result, inputs, /*outputs=*/{}, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, /*outputs=*/{}, bodyBuild); } ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { @@ -1471,7 +1472,8 @@ void ReduceOp::build( } if (bodyBuild) - buildGenericRegion(builder, result, inputs, inits, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, inits, bodyBuild); } SmallVector ReduceOp::getIteratorTypesArray() { @@ -1648,13 +1650,13 @@ LogicalResult ReduceOp::verify() { // TransposeOp //===----------------------------------------------------------------------===// -std::function)> -TransposeOp::getRegionBuilder() { - return [](mlir::ImplicitLocOpBuilder &b, mlir::Block &block, - mlir::ArrayRef) { - b.create(block.getArguments().front()); - }; +static void buildIdentityRegion(OpBuilder &builder, Location loc, + Region ®ion, ValueRange inputs, + ValueRange outputs) { + buildGenericRegion(builder, loc, region, inputs, outputs, + [](OpBuilder &b, Location loc, ValueRange args) { + b.create(loc, args[0]); + }); } void TransposeOp::build(::mlir::OpBuilder &builder, @@ -1671,11 +1673,8 @@ void TransposeOp::build(::mlir::OpBuilder &builder, if (initType.isa()) result.addTypes(initType); - (void)result.addRegion(); - buildGenericRegion(builder, result, input, init, - [&](OpBuilder &b, Location loc, ValueRange args) { - b.create(loc, args[0]); - }); + buildIdentityRegion(builder, result.location, *result.addRegion(), input, + init); } void TransposeOp::build(::mlir::OpBuilder &builder, @@ -1693,13 +1692,10 @@ ParseResult TransposeOp::parse(OpAsmParser &parser, OperationState &result) { }))) return failure(); - (void)result.addRegion(); OpBuilder builder(parser.getContext()); - buildGenericRegion(builder, result, /*inputs=*/result.operands, - /*outputs=*/{}, - [&](OpBuilder &b, Location loc, ValueRange args) { - b.create(loc, args[0]); - }); + buildIdentityRegion(builder, result.location, *result.addRegion(), + /*inputs=*/result.operands, + /*outputs=*/{}); return success(); } @@ -1778,6 +1774,144 @@ void TransposeOp::getEffects( getDpsInputOperands(), getDpsInitOperands()); } +//===----------------------------------------------------------------------===// +// BroadcastOp +//===----------------------------------------------------------------------===// + +void BroadcastOp::build(::mlir::OpBuilder &builder, + ::mlir::OperationState &result, Value input, Value init, + DenseI64ArrayAttr dimensions, + ArrayRef attributes) { + result.addOperands(input); + result.addOperands(init); + result.addAttribute(getDimensionsAttrName(result.name), dimensions); + result.addAttributes(attributes); + + // Add output types for `RankedTensorType` output arguments. + Type initType = init.getType(); + if (initType.isa()) + result.addTypes(initType); + + buildIdentityRegion(builder, result.location, *result.addRegion(), input, + init); +} + +void BroadcastOp::build(::mlir::OpBuilder &builder, + ::mlir::OperationState &result, Value input, Value init, + ArrayRef dimensions, + ArrayRef attributes) { + build(builder, result, input, init, builder.getDenseI64ArrayAttr(dimensions), + attributes); +} + +ParseResult BroadcastOp::parse(OpAsmParser &parser, OperationState &result) { + if (failed(parseDstStyleOp( + parser, result, [&](OpAsmParser &parser, NamedAttrList &attributes) { + return parseDenseI64ArrayAttr(parser, attributes, "dimensions"); + }))) + return failure(); + + OpBuilder builder(parser.getContext()); + buildIdentityRegion(builder, result.location, *result.addRegion(), + /*inputs=*/result.operands, + /*outputs=*/{}); + return success(); +} + +void BroadcastOp::getAsmResultNames( + function_ref setNameFn) { + if (!getResults().empty()) + setNameFn(getResults().front(), "broadcasted"); +} + +void BroadcastOp::print(OpAsmPrinter &p) { + p.increaseIndent(); + printCommonStructuredOpPartsWithNewLine( + p, SmallVector(getDpsInputOperands()), + SmallVector(getDpsInitOperands())); + p.printNewline(); + + printDenseI64ArrayAttr(p, getDimensionsAttrName(), getDimensions()); + p.printOptionalAttrDict((*this)->getAttrs(), {getDimensionsAttrName()}); + p.decreaseIndent(); +} + +LogicalResult BroadcastOp::verify() { + ArrayRef dimensionsRef = getDimensions(); + + if (!llvm::is_sorted(dimensionsRef)) + return emitOpError() << "dimensions should be in sorted order, implicit " + "transpose is not supported"; + + auto inputType = getInput().getType(); + auto initType = getInit().getType(); + + int64_t inputRank = inputType.getRank(); + int64_t initRank = initType.getRank(); + + auto inputShape = inputType.getShape(); + auto initShape = initType.getShape(); + + if (inputRank != dimensionsRef.size()) + return emitOpError() + << "input rank does match the number of dimensions. expected: " + << inputRank << ", got: " << dimensionsRef.size(); + + // Mapping from init dims to input dims. + const int64_t kUnmappedDim = -1; + SmallVector reverseDimMap(initRank, kUnmappedDim); + + for (const auto &[idx, dim] : llvm::enumerate(dimensionsRef)) { + if (dim < 0 || dim >= initRank) + return emitOpError() << "dimension " << idx + << " is out of range. expected range: [0, " + << initRank - 1 << "], got: " << dim; + + reverseDimMap[dim] = idx; + } + + for (const auto &[idx, inputDimIdx] : llvm::enumerate(reverseDimMap)) { + if (inputDimIdx == kUnmappedDim) { + // This dimensions is being added. Should be statically known. + if (ShapedType::isDynamic(initShape[idx])) + return emitOpError() + << "init dim " << idx + << " can't be dynamic, because it's not matched to input"; + } else { + // This dimensions is mapped from the input. Init and input dims should + // match. + if (inputShape[inputDimIdx] != initShape[idx]) + return emitOpError() + << "input dim " << inputDimIdx << " should match init dim " + << idx << ". input: " << inputShape[inputDimIdx] + << ", init: " << initShape[idx]; + } + } + + return success(); +} + +SmallVector BroadcastOp::getIteratorTypesArray() { + int64_t rank = getInit().getType().getRank(); + return SmallVector(rank, getParallelIteratorTypeName()); +} + +ArrayAttr BroadcastOp::getIndexingMaps() { + Builder builder(getContext()); + int64_t rank = getInit().getType().getRank(); + return builder.getAffineMapArrayAttr( + {builder.getMultiDimIdentityMap(rank).getSubMap( + llvm::to_vector_of(getDimensions())), + builder.getMultiDimIdentityMap(rank)}); +} + +void BroadcastOp::getEffects( + SmallVectorImpl> + &effects) { + getGenericEffectsImpl(effects, getOperation()->getResults(), + getDpsInputOperands(), getDpsInitOperands()); +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 9200c6117a493..5a1c2afdebbdd 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -673,3 +673,81 @@ func.func @transpose_input_init_rank_mismatch(%input: tensor<16x32xf32>, permutation = [1, 0, 2] func.return %transpose : tensor<32x64x16xf32> } + +// ----- + +func.func @broadcast_unsorted_dims( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op dimensions should be in sorted order}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [1, 0] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_input_dims_rank_mismatch( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input rank does match the number of dimensions. expected: 2, got: 1}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [0] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_unsorted_dims( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op dimension 1 is out of range. expected range: [0, 2], got: 5}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [0, 5] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_mapped_dim_mismatch( + %input: tensor<4x16xf32>, %init: tensor<5x8x16xf32>) + -> tensor<5x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<5x8x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<5x8x16xf32> +} + +// ----- + +func.func @broadcast_added_dynamic_mismatch( + %input: tensor<4x16xf32>, %init: tensor<4x?x16xf32>) + -> tensor<4x?x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op init dim 1 can't be dynamic, because it's not matched to input}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x?x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<4x?x16xf32> +} + +// ----- + +func.func @broadcast_size_1_extension_not_supported( + %input: tensor<1x16xf32>, %init: tensor<4x?x16xf32>) + -> tensor<4x?x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}} + %bcast = linalg.broadcast + ins(%input:tensor<1x16xf32>) + outs(%init:tensor<4x?x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<4x?x16xf32> +} diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 58dec2be2373a..9d100d5117fdd 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -388,6 +388,19 @@ func.func @transpose(%input: tensor<16x32x64xf32>, // ----- +// CHECK-LABEL: func @broadcast +// CHECK-SAME: %[[ARG0:.*]]: memref<8x32xf32 +func.func @broadcast(%input: tensor<8x32xf32>, + %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x32xf32>) + outs(%init:tensor<8x16x32xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x32xf32> +} + +// ----- + //===----------------------------------------------------------------------===// // AllocTensorOp elimination would produce SSA violations for the example below. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index fc0e3e057d9a8..64c2bea1f7ee1 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -517,3 +517,53 @@ func.func @transpose_memref(%input: memref<16x32x64xf32>, func.return } // CHECK-LABEL: func @transpose_memref + +// ----- + +func.func @broadcast_static_sizes(%input: tensor<8x32xf32>, + %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x32xf32>) + outs(%init:tensor<8x16x32xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x32xf32> +} +// CHECK-LABEL: func @broadcast_static_sizes +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions + +// ----- + +func.func @broadcast_with_dynamic_sizes( + %input: tensor<8x?xf32>, %init: tensor<8x16x?xf32>) + -> tensor<8x16x?xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x?xf32>) + outs(%init:tensor<8x16x?xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x?xf32> +} +// CHECK-LABEL: func @broadcast_with_dynamic_sizes +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions + +// ----- + +func.func @broadcast_memref(%input: memref<8x32xf32>, + %init: memref<8x16x32xf32>) { + linalg.broadcast + ins(%input:memref<8x32xf32>) + outs(%init:memref<8x16x32xf32>) + dimensions = [0, 2] + func.return +} + +// CHECK-LABEL: func @broadcast_memref +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir index 9addbcc83517c..b2e3fd5eec3b1 100644 --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -240,3 +240,29 @@ func.func @reduce(%arg0: memref<16x32x64xf32>, // CHECK: %[[OUT_ELEM:.*]] = memref.load %[[OUT]][%[[I]], %[[K]]] // CHECK: %[[ADD:.*]] = arith.addf %[[IN_ELEM]], %[[OUT_ELEM]] // CHECK: memref.store %[[ADD]], %[[OUT]][%[[I]], %[[K]]] + +// ----- + +func.func @broadcast(%input: memref<8x32xf32>, + %init: memref<8x16x32xf32>) { + linalg.broadcast + ins(%input:memref<8x32xf32>) + outs(%init:memref<8x16x32xf32>) + dimensions = [0, 2] + func.return +} +// CHECK-LABEL: func.func @broadcast( +// CHECK-SAME: %[[IN:[a-zA-Z0-9]+]]: memref<8x32xf32>, +// CHECK-SAME: %[[OUT:[a-zA-Z0-9]+]]: memref<8x16x32xf32> + +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index + +// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C8]] step %[[C1]] { +// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C16]] step %[[C1]] { +// CHECK: scf.for %[[K:.*]] = %[[C0]] to %[[C32]] step %[[C1]] { +// CHECK: %[[ELEM:.*]] = memref.load %[[IN]][%[[I]], %[[K]]] +// CHECK: memref.store %[[ELEM]], %[[OUT]][%[[I]], %[[J]], %[[K]]] From c14df228ff3ca73e3c5c00c495216bba56665fd5 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Fri, 4 Nov 2022 18:30:17 +0700 Subject: [PATCH 238/516] [Driver] Do not run test on AIX Differential Revision: https://reviews.llvm.org/D136090 --- clang/test/Driver/response-file-errs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/test/Driver/response-file-errs.c b/clang/test/Driver/response-file-errs.c index c0e02a984b9af..64eb3208a836c 100644 --- a/clang/test/Driver/response-file-errs.c +++ b/clang/test/Driver/response-file-errs.c @@ -1,3 +1,6 @@ +// AIX reacts on opening directory differently than other systems. +// UNSUPPORTED: aix + // If response file does not exist, '@file; directive remains unexpanded in // command line. // From 7292d051a94032ea9ea56471ad40cc81696cca5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 4 Nov 2022 09:19:22 +0100 Subject: [PATCH 239/516] [llvm-debuginfo-analyzer] Fix format string-type mismatch in LVScope Fix mismatch between `%d`/`%x` format strings and `uint64_t` type. This fixes incorrect printing of "Scope Sizes" on 32-bit platforms where this leads to `llvm::print()` misreading vararg. Fixes #58758 Differential Revision: https://reviews.llvm.org/D137400 --- llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp | 5 +++-- .../DWARF/06-dwarf-full-logical-view.test | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp index dd5530e07330a..f012bb471be2e 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp @@ -1192,7 +1192,8 @@ void LVScopeCompileUnit::addSize(LVScope *Scope, LVOffset Lower, LVOffset Upper) { LLVM_DEBUG({ dbgs() << format( - "CU [0x%08x], Scope [0x%08x], Range [0x%08x:0x%08x], Size = %d\n", + "CU [0x%08" PRIx64 "], Scope [0x%08" PRIx64 "], Range [0x%08" PRIx64 + ":0x%08" PRIx64 "], Size = %" PRId64 "\n", getOffset(), Scope->getOffset(), Lower, Upper, Upper - Lower); }); @@ -1548,7 +1549,7 @@ void LVScopeCompileUnit::printScopeSize(const LVScope *Scope, raw_ostream &OS) { // implementation-defined rounding inside printing functions. float Percentage = rint((float(Size) / CUContributionSize) * 100.0 * 100.0) / 100.0; - OS << format("%10d (%6.2f%%) : ", Size, Percentage); + OS << format("%10" PRId64 " (%6.2f%%) : ", Size, Percentage); Scope->print(OS); // Keep record of the total sizes at each lexical level. diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test index 742b2e0c3b11e..e83592afd2ffa 100644 --- a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test +++ b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test @@ -1,8 +1,5 @@ ; REQUIRES: x86-registered-target -; FIXME: Test failure https://reviews.llvm.org/D125783 -; UNSUPPORTED: arm - ; Test case 6 - Full logical view ; test.cpp From 1fb186198af5f183dde053c1396f899567755d64 Mon Sep 17 00:00:00 2001 From: Alexey Moksyakov Date: Mon, 27 Jun 2022 12:37:53 +0000 Subject: [PATCH 240/516] adds huge pages support of PIE/no-PIE binaries This patch adds the huge pages support (-hugify) for PIE/no-PIE binaries. Also returned functionality to support the kernels < 5.10 where there is a problem in a dynamic loader with the alignment of pages addresses. Differential Revision: https://reviews.llvm.org/D129107 --- bolt/include/bolt/Passes/Hugify.h | 29 +++ .../bolt/RuntimeLibs/HugifyRuntimeLibrary.h | 6 +- bolt/include/bolt/Utils/CommandLineOpts.h | 1 + bolt/lib/Passes/CMakeLists.txt | 1 + bolt/lib/Passes/Hugify.cpp | 50 +++++ bolt/lib/Rewrite/BinaryPassManager.cpp | 3 + bolt/lib/Rewrite/RewriteInstance.cpp | 11 + bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp | 29 --- bolt/runtime/CMakeLists.txt | 5 +- bolt/runtime/common.h | 74 +++++++ bolt/runtime/hugify.cpp | 190 +++++++++++------- bolt/test/runtime/X86/hugify.c | 27 +++ 12 files changed, 321 insertions(+), 105 deletions(-) create mode 100644 bolt/include/bolt/Passes/Hugify.h create mode 100644 bolt/lib/Passes/Hugify.cpp create mode 100644 bolt/test/runtime/X86/hugify.c diff --git a/bolt/include/bolt/Passes/Hugify.h b/bolt/include/bolt/Passes/Hugify.h new file mode 100644 index 0000000000000..0a7734059121c --- /dev/null +++ b/bolt/include/bolt/Passes/Hugify.h @@ -0,0 +1,29 @@ +//===- bolt/Passes/Hugify.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_HUGIFY_H +#define BOLT_PASSES_HUGIFY_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class HugePage : public BinaryFunctionPass { +public: + HugePage(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} + + void runOnFunctions(BinaryContext &BC) override; + + const char *getName() const override { return "HugePage"; } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h index 4889708b13a3b..e9357d1c36153 100644 --- a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h +++ b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h @@ -22,13 +22,11 @@ class HugifyRuntimeLibrary : public RuntimeLibrary { public: /// Add custom section names generated by the runtime libraries to \p /// SecNames. - void addRuntimeLibSections(std::vector &SecNames) const final { - SecNames.push_back(".bolt.hugify.entries"); - } + void addRuntimeLibSections(std::vector &SecNames) const final {} void adjustCommandLineOptions(const BinaryContext &BC) const final; - void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final; + void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final {} void link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) final; diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index b7cca813bdec7..7b654f19f6d45 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -44,6 +44,7 @@ extern llvm::cl::opt HeatmapMinAddress; extern llvm::cl::opt HotData; extern llvm::cl::opt HotFunctionsAtEnd; extern llvm::cl::opt HotText; +extern llvm::cl::opt Hugify; extern llvm::cl::opt Instrument; extern llvm::cl::opt OutputFilename; extern llvm::cl::opt PerfData; diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 7f67261a4e04b..6a0638ba3aa74 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMBOLTPasses FrameOptimizer.cpp HFSort.cpp HFSortPlus.cpp + Hugify.cpp IdenticalCodeFolding.cpp IndirectCallPromotion.cpp Inliner.cpp diff --git a/bolt/lib/Passes/Hugify.cpp b/bolt/lib/Passes/Hugify.cpp new file mode 100644 index 0000000000000..170fb5bda349f --- /dev/null +++ b/bolt/lib/Passes/Hugify.cpp @@ -0,0 +1,50 @@ +//===--- bolt/Passes/Hugify.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/Hugify.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "bolt-hugify" + +using namespace llvm; + +namespace llvm { +namespace bolt { + +void HugePage::runOnFunctions(BinaryContext &BC) { + auto *RtLibrary = BC.getRuntimeLibrary(); + if (!RtLibrary || !BC.isELF() || !BC.StartFunctionAddress) { + return; + } + + auto createSimpleFunction = + [&](std::string Title, std::vector Instrs) -> BinaryFunction * { + BinaryFunction *Func = BC.createInjectedBinaryFunction(Title); + + std::vector> BBs; + BBs.emplace_back(Func->createBasicBlock(nullptr)); + BBs.back()->addInstructions(Instrs.begin(), Instrs.end()); + BBs.back()->setCFIState(0); + BBs.back()->setOffset(BinaryBasicBlock::INVALID_OFFSET); + + Func->insertBasicBlocks(nullptr, std::move(BBs), + /*UpdateLayout=*/true, + /*UpdateCFIState=*/false); + Func->updateState(BinaryFunction::State::CFG_Finalized); + return Func; + }; + + const BinaryFunction *const Start = + BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); + assert(Start && "Entry point function not found"); + const MCSymbol *StartSym = Start->getSymbol(); + createSimpleFunction("__bolt_hugify_start_program", + BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get())); +} +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 85b49a29c8079..0b17dd54ece58 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -13,6 +13,7 @@ #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" #include "bolt/Passes/FrameOptimizer.h" +#include "bolt/Passes/Hugify.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" @@ -333,6 +334,8 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); + else if (opts::Hugify) + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 6ea4ba603698c..b0403da636011 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -479,6 +479,11 @@ Error RewriteInstance::discoverStorage() { NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); + // Hugify: Additional huge page from left side due to + // weird ASLR mapping addresses (4KB aligned) + if (opts::Hugify && !BC->HasFixedLoadAddress) + NextAvailableAddress += BC->PageAlign; + if (!opts::UseGnuStack) { // This is where the black magic happens. Creating PHDR table in a segment // other than that containing ELF header is tricky. Some loaders and/or @@ -3719,6 +3724,12 @@ void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { Address = alignTo(Address, Section->getAlignment()); Section->setOutputAddress(Address); Address += Section->getOutputSize(); + + // Hugify: Additional huge page from right side due to + // weird ASLR mapping addresses (4KB aligned) + if (opts::Hugify && !BC->HasFixedLoadAddress && + Section->getName() == BC->getMainCodeSectionName()) + Address = alignTo(Address, Section->getAlignment()); } // Make sure we allocate enough space for huge pages. diff --git a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp index 9a4a1f7239e54..802bb0d1fe914 100644 --- a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp @@ -60,35 +60,6 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions( } } -void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { - const BinaryFunction *StartFunction = - BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress)); - assert(!StartFunction->isFragment() && "expected main function fragment"); - if (!StartFunction) { - errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; - exit(1); - } - - const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/false, - /*IsText=*/false, - /*IsAllocatable=*/true); - MCSectionELF *Section = - BC.Ctx->getELFSection(".bolt.hugify.entries", ELF::SHT_PROGBITS, Flags); - - // __bolt_hugify_init_ptr stores the poiter the hugify library needs to - // jump to after finishing the init code. - MCSymbol *InitPtr = BC.Ctx->getOrCreateSymbol("__bolt_hugify_init_ptr"); - - Section->setAlignment(llvm::Align(BC.RegularPageSize)); - Streamer.switchSection(Section); - - Streamer.emitLabel(InitPtr); - Streamer.emitSymbolAttribute(InitPtr, MCSymbolAttr::MCSA_Global); - Streamer.emitValue( - MCSymbolRefExpr::create(StartFunction->getSymbol(), *(BC.Ctx)), - /*Size=*/8); -} - void HugifyRuntimeLibrary::link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) { diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt index f3cea8d2dc08c..c685819aec821 100644 --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -27,10 +27,11 @@ set(BOLT_RT_FLAGS -fno-exceptions -fno-rtti -fno-stack-protector - -mno-sse) + -mno-sse + -fPIE) # Don't let the compiler think it can create calls to standard libs -target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) +target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 008dbb6c3de86..58b8114713d69 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -283,6 +283,22 @@ uint32_t strLen(const char *Str) { return Size; } +void *strStr(const char *const Haystack, const char *const Needle) { + int j = 0; + + for (int i = 0; i < strLen(Haystack); i++) { + if (Haystack[i] == Needle[0]) { + for (j = 1; j < strLen(Needle); j++) { + if (Haystack[i + j] != Needle[j]) + break; + } + if (j == strLen(Needle)) + return (void *)&Haystack[i]; + } + } + return nullptr; +} + void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { char Buf[BufSize]; char *Ptr = Buf; @@ -310,6 +326,25 @@ unsigned long hexToLong(const char *Str, char Terminator = '\0') { return Res; } +/// Starting from character at \p buf, find the longest consecutive sequence +/// of digits (0-9) and convert it to uint32_t. The converted value +/// is put into \p ret. \p end marks the end of the buffer to avoid buffer +/// overflow. The function \returns whether a valid uint32_t value is found. +/// \p buf will be updated to the next character right after the digits. +static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) { + uint64_t Result = 0; + const char *OldBuf = Buf; + while (Buf < End && ((*Buf) >= '0' && (*Buf) <= '9')) { + Result = Result * 10 + (*Buf) - '0'; + ++Buf; + } + if (OldBuf != Buf && Result <= 0xFFFFFFFFu) { + Ret = static_cast(Result); + return true; + } + return false; +} + #if !defined(__APPLE__) // We use a stack-allocated buffer for string manipulation in many pieces of // this code, including the code that prints each line of the fdata file. This @@ -387,6 +422,28 @@ int __madvise(void *addr, size_t length, int advice) { return ret; } +#define _UTSNAME_LENGTH 65 + +struct UtsNameTy { + char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ + char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined + network" */ + char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ + char version[_UTSNAME_LENGTH]; /* Operating system version */ + char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ + char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ +}; + +int __uname(struct UtsNameTy *Buf) { + int Ret; + __asm__ __volatile__("movq $63, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Buf) + : "cc", "rcx", "r11", "memory"); + return Ret; +} + struct timespec { uint64_t tv_sec; /* seconds */ uint64_t tv_nsec; /* nanoseconds */ @@ -482,6 +539,23 @@ int __fsync(int fd) { return ret; } +// %rdi %rsi %rdx %r10 %r8 +// sys_prctl int option unsigned unsigned unsigned unsigned +// long arg2 long arg3 long arg4 long arg5 +int __prctl(int Option, unsigned long Arg2, unsigned long Arg3, + unsigned long Arg4, unsigned long Arg5) { + int Ret; + register long rdx asm("rdx") = Arg3; + register long r8 asm("r8") = Arg5; + register long r10 asm("r10") = Arg4; + __asm__ __volatile__("movq $157, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8) + :); + return Ret; +} + #endif void reportError(const char *Msg, uint64_t Size) { diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index 69e1a7e0694a8..d8a2d8b45f6cb 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -1,129 +1,179 @@ -//===- bolt/runtime/hugify.cpp --------------------------------------------===// +//===- bolt/runtime/hugify.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// #if defined (__x86_64__) #if !defined(__APPLE__) #include "common.h" -#include + +#pragma GCC visibility push(hidden) // Enables a very verbose logging to stderr useful when debugging -//#define ENABLE_DEBUG +// #define ENABLE_DEBUG + +#ifdef ENABLE_DEBUG +#define DEBUG(X) \ + { X; } +#else +#define DEBUG(X) \ + {} +#endif -// Function pointers to init routines in the binary, so we can resume -// regular execution of the function that we hooked. -extern void (*__bolt_hugify_init_ptr)(); +// Function constains trampoline to _start, +// so we can resume regular execution of the function that we hooked. +extern void __bolt_hugify_start_program(); // The __hot_start and __hot_end symbols set by Bolt. We use them to figure // out the rage for marking huge pages. extern uint64_t __hot_start; extern uint64_t __hot_end; -#ifdef MADV_HUGEPAGE +static void getKernelVersion(uint32_t *Val) { + // release should be in the format: %d.%d.%d + // major, minor, release + struct UtsNameTy UtsName; + int Ret = __uname(&UtsName); + const char *Buf = UtsName.release; + const char *End = Buf + strLen(Buf); + const char Delims[2][2] = {".", "."}; + + for (int i = 0; i < 3; ++i) { + if (!scanUInt32(Buf, End, Val[i])) { + return; + } + if (i < sizeof(Delims) / sizeof(Delims[0])) { + const char *Ptr = Delims[i]; + while (*Ptr != '\0') { + if (*Ptr != *Buf) { + return; + } + ++Ptr; + ++Buf; + } + } + } +} + /// Check whether the kernel supports THP via corresponding sysfs entry. -static bool has_pagecache_thp_support() { - char buf[256] = {0}; - const char *madviseStr = "always [madvise] never"; +/// thp works only starting from 5.10 +static bool hasPagecacheTHPSupport() { + char Buf[64]; - int fd = __open("/sys/kernel/mm/transparent_hugepage/enabled", + int FD = __open("/sys/kernel/mm/transparent_hugepage/enabled", 0 /* O_RDONLY */, 0); - if (fd < 0) + if (FD < 0) + return false; + + memset(Buf, 0, sizeof(Buf)); + const size_t Res = __read(FD, Buf, sizeof(Buf)); + if (Res < 0) return false; - size_t res = __read(fd, buf, 256); - if (res < 0) + if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) return false; - int cmp = strnCmp(buf, madviseStr, strLen(madviseStr)); - return cmp == 0; + struct KernelVersionTy { + uint32_t major; + uint32_t minor; + uint32_t release; + }; + + KernelVersionTy KernelVersion; + + getKernelVersion((uint32_t *)&KernelVersion); + if (KernelVersion.major >= 5 && KernelVersion.minor >= 10) + return true; + + return false; } -static void hugify_for_old_kernel(uint8_t *from, uint8_t *to) { - size_t size = to - from; +static void hugifyForOldKernel(uint8_t *From, uint8_t *To) { + const size_t Size = To - From; - uint8_t *mem = reinterpret_cast( - __mmap(0, size, 0x3 /* PROT_READ | PROT_WRITE*/, - 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS*/, -1, 0)); + uint8_t *Mem = reinterpret_cast( + __mmap(0, Size, 0x3 /* PROT_READ | PROT_WRITE */, + 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS */, -1, 0)); - if (mem == (void *)MAP_FAILED) { - char msg[] = "Could not allocate memory for text move\n"; - reportError(msg, sizeof(msg)); + if (Mem == ((void *)-1) /* MAP_FAILED */) { + char Msg[] = "[hugify] could not allocate memory for text move\n"; + reportError(Msg, sizeof(Msg)); } -#ifdef ENABLE_DEBUG - reportNumber("Allocated temporary space: ", (uint64_t)mem, 16); -#endif - // Copy the hot code to a temproary location. - memcpy(mem, from, size); + DEBUG(reportNumber("[hugify] allocated temporary address: ", (uint64_t)Mem, + 16);) + DEBUG(reportNumber("[hugify] allocated size: ", (uint64_t)Size, 16);) + + // Copy the hot code to a temporary location. + memcpy(Mem, From, Size); + __prctl(41 /* PR_SET_THP_DISABLE */, 0, 0, 0, 0); // Maps out the existing hot code. - if (__mmap(reinterpret_cast(from), size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, - 0) == (void *)MAP_FAILED) { - char msg[] = "failed to mmap memory for large page move terminating\n"; - reportError(msg, sizeof(msg)); + if (__mmap(reinterpret_cast(From), Size, + 0x3 /* PROT_READ | PROT_WRITE */, + 0x32 /* MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE */, -1, + 0) == ((void *)-1) /*MAP_FAILED*/) { + char Msg[] = + "[hugify] failed to mmap memory for large page move terminating\n"; + reportError(Msg, sizeof(Msg)); } // Mark the hot code page to be huge page. - if (__madvise(from, size, MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; - reportError(msg, sizeof(msg)); + if (__madvise(From, Size, 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] setting MADV_HUGEPAGE is failed\n"; + reportError(Msg, sizeof(Msg)); } // Copy the hot code back. - memcpy(from, mem, size); + memcpy(From, Mem, Size); // Change permission back to read-only, ignore failure - __mprotect(from, size, PROT_READ | PROT_EXEC); + __mprotect(From, Size, 0x5 /* PROT_READ | PROT_EXEC */); - __munmap(mem, size); + __munmap(Mem, Size); } #endif extern "C" void __bolt_hugify_self_impl() { -#ifdef MADV_HUGEPAGE - uint8_t *hotStart = (uint8_t *)&__hot_start; - uint8_t *hotEnd = (uint8_t *)&__hot_end; + uint8_t *HotStart = (uint8_t *)&__hot_start; + uint8_t *HotEnd = (uint8_t *)&__hot_end; // Make sure the start and end are aligned with huge page address - const size_t hugePageBytes = 2L * 1024 * 1024; - uint8_t *from = hotStart - ((intptr_t)hotStart & (hugePageBytes - 1)); - uint8_t *to = hotEnd + (hugePageBytes - 1); - to -= (intptr_t)to & (hugePageBytes - 1); - -#ifdef ENABLE_DEBUG - reportNumber("[hugify] hot start: ", (uint64_t)hotStart, 16); - reportNumber("[hugify] hot end: ", (uint64_t)hotEnd, 16); - reportNumber("[hugify] aligned huge page from: ", (uint64_t)from, 16); - reportNumber("[hugify] aligned huge page to: ", (uint64_t)to, 16); -#endif - - if (!has_pagecache_thp_support()) { - hugify_for_old_kernel(from, to); + const size_t HugePageBytes = 2L * 1024 * 1024; + uint8_t *From = HotStart - ((intptr_t)HotStart & (HugePageBytes - 1)); + uint8_t *To = HotEnd + (HugePageBytes - 1); + To -= (intptr_t)To & (HugePageBytes - 1); + + DEBUG(reportNumber("[hugify] hot start: ", (uint64_t)HotStart, 16);) + DEBUG(reportNumber("[hugify] hot end: ", (uint64_t)HotEnd, 16);) + DEBUG(reportNumber("[hugify] aligned huge page from: ", (uint64_t)From, 16);) + DEBUG(reportNumber("[hugify] aligned huge page to: ", (uint64_t)To, 16);) + + if (!hasPagecacheTHPSupport()) { + DEBUG(report( + "[hugify] workaround with memory alignment for kernel < 5.10\n");) + hugifyForOldKernel(From, To); return; } - if (__madvise(from, (to - from), MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; + if (__madvise(From, (To - From), 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] failed to allocate large page\n"; // TODO: allow user to control the failure behavior. - reportError(msg, sizeof(msg)); + reportError(Msg, sizeof(Msg)); } -#endif } /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { - __asm__ __volatile__(SAVE_ALL - "call __bolt_hugify_self_impl\n" - RESTORE_ALL - "jmp *__bolt_hugify_init_ptr(%%rip)\n" - :::); -} - +#if defined(__x86_64__) + __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL + "jmp __bolt_hugify_start_program\n" :: + :); +#else + exit(1); #endif +} #endif diff --git a/bolt/test/runtime/X86/hugify.c b/bolt/test/runtime/X86/hugify.c new file mode 100644 index 0000000000000..cfc0cb62652b9 --- /dev/null +++ b/bolt/test/runtime/X86/hugify.c @@ -0,0 +1,27 @@ +// Make sure BOLT correctly processes --hugify option + +#include + +int main(int argc, char **argv) { + printf("Hello world\n"); + return 0; +} + +/* +REQUIRES: system-linux,bolt-runtime + +RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q +RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q + +RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify +RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify + +RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE + +CHECK-NOPIE: Hello world + +RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE + +CHECK-PIE: Hello world + +*/ From 7d0648cb6c5f3c7377662a1211846f9fe03c474f Mon Sep 17 00:00:00 2001 From: Alex Gatea Date: Fri, 4 Nov 2022 14:28:17 +0100 Subject: [PATCH 241/516] [GVN] Patch for invalid GVN replacement If PRE is performed as part of the main GVN pass (to PRE GEP operands before processing loads), and it is performed across a backedge, we will end up adding the new instruction to the leader table of a block that has not yet been processed. When it will be processed, GVN will incorrectly assume that the value is already available, even though it is only available at the end of the block. Avoid this by not performing PRE across backedges. Fixes https://github.com/llvm/llvm-project/issues/58418. Differential Revision: https://reviews.llvm.org/D136095 --- llvm/lib/Transforms/Scalar/GVN.cpp | 11 +---- .../GVN/PRE/load-pre-across-backedge.ll | 45 +++++++++++++++++++ llvm/test/Transforms/GVN/PRE/pre-gep-load.ll | 7 ++- 3 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index a489a890f6641..1bc5123ded321 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2828,17 +2828,10 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { NumWithout = 2; break; } - // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and - // when CurInst has operand defined in CurrentBlock (so it may be defined - // by phi in the loop header). + // It is not safe to do PRE when P->CurrentBlock is a loop backedge. assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) && "Invalid BlockRPONumber map."); - if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] && - llvm::any_of(CurInst->operands(), [&](const Use &U) { - if (auto *Inst = dyn_cast(U.get())) - return Inst->getParent() == CurrentBlock; - return false; - })) { + if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) { NumWithout = 2; break; } diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll b/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll new file mode 100644 index 0000000000000..fe7ceed54fc58 --- /dev/null +++ b/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +; Check that PRE-LOAD across backedge does not +; result in invalid dominator tree. +declare void @use(i32) + +define void @test1(i1 %c, i32 %arg) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[DOTBB2_CRIT_EDGE:%.*]] +; CHECK: .bb2_crit_edge: +; CHECK-NEXT: [[DOTPRE:%.*]] = shl i32 [[ARG:%.*]], 2 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[ARG]], 2 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[SHL2_PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[DOTBB2_CRIT_EDGE]] ], [ [[SHL3:%.*]], [[BB3]] ] +; CHECK-NEXT: call void @use(i32 [[SHL2_PRE_PHI]]) +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SHL3]] = shl i32 [[ARG]], 2 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr null, i32 [[SHL3]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: call void @use(i32 [[V]]) +; CHECK-NEXT: br label [[BB2]] +; + br i1 %c, label %bb1, label %bb2 + +bb1: + %shl1 = shl i32 %arg, 2 + br label %bb3 + +bb2: + %shl2 = shl i32 %arg, 2 + call void @use(i32 %shl2) + br label %bb3 + +bb3: + %shl3 = shl i32 %arg, 2 + %gep = getelementptr i32, ptr null, i32 %shl3 + %v = load i32, ptr %gep, align 4 + call void @use(i32 %v) + br label %bb2 +} diff --git a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll index ed3ac43b4beca..e40e6ffc0aa3b 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -95,14 +95,13 @@ define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) { ; CHECK-LABEL: @test_shortcut_safe( ; CHECK-NEXT: br i1 [[TST:%.*]], label [[SEXT1:%.*]], label [[PRE_DEST:%.*]] ; CHECK: pre.dest: -; CHECK-NEXT: [[DOTPRE:%.*]] = sext i32 [[P1:%.*]] to i64 ; CHECK-NEXT: br label [[SEXT_USE:%.*]] ; CHECK: sext1: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[P1]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[P1:%.*]] to i64 ; CHECK-NEXT: br label [[SEXT_USE]] ; CHECK: sext.use: -; CHECK-NEXT: [[IDXPROM2_PRE_PHI:%.*]] = phi i64 [ [[IDXPROM]], [[SEXT1]] ], [ [[DOTPRE]], [[PRE_DEST]] ] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM2_PRE_PHI]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[P1]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM2]] ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 ; CHECK-NEXT: tail call void @g(i32 [[VAL]]) ; CHECK-NEXT: br label [[PRE_DEST]] From 96ad51e3ebafdaed345a699d752ee4d96b00d82c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20MARTINEZ=20CAAMA=C3=91O?= Date: Fri, 4 Nov 2022 13:28:26 +0000 Subject: [PATCH 242/516] [StructurizeCFG][DebugInfo] Avoid use-after-free Reviewed By: dstuttard Differential Revision: https://reviews.llvm.org/D137408 --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2adf172f6b98a..81d151c2904e8 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -856,7 +856,12 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); FlowSet.insert(Flow); - TermDL[Flow] = TermDL[Dominator]; + + // use a temporary variable to avoid a use-after-free if the map's storage is + // reallocated + DebugLoc DL = TermDL[Dominator]; + TermDL[Flow] = std::move(DL); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); return Flow; From 7acfe3629479c8489fc2d7f629994dc200be990c Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Sun, 23 Oct 2022 17:32:58 +0200 Subject: [PATCH 243/516] Implement CWG2631 Implement https://cplusplus.github.io/CWG/issues/2631.html. Immediate calls in default arguments and defaults members are not evaluated. Instead, we evaluate them when constructing a `CXXDefaultArgExpr`/`BuildCXXDefaultInitExpr`. The immediate calls are executed by doing a transform on the initializing expression. Note that lambdas are not considering subexpressions so we do not need to transform them. As a result of this patch, unused default member initializers are not considered odr-used, and errors about members binding to local variables in an outer scope only surface at the point where a constructor is defined. Reviewed By: aaron.ballman, #clang-language-wg Differential Revision: https://reviews.llvm.org/D136554 --- clang/docs/ReleaseNotes.rst | 5 + clang/include/clang/AST/ExprCXX.h | 108 ++++++-- clang/include/clang/AST/Stmt.h | 7 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Sema/Sema.h | 87 +++++- clang/lib/AST/ASTImporter.cpp | 21 +- clang/lib/AST/Decl.cpp | 3 +- clang/lib/AST/ExprCXX.cpp | 68 ++++- clang/lib/Parse/ParseCXXInlineMethods.cpp | 5 + clang/lib/Parse/ParseDeclCXX.cpp | 6 +- clang/lib/Sema/SemaDeclCXX.cpp | 90 ++---- clang/lib/Sema/SemaExpr.cpp | 258 ++++++++++++++++-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- clang/lib/Sema/TreeTransform.h | 21 +- clang/lib/Serialization/ASTReaderStmt.cpp | 12 +- clang/lib/Serialization/ASTWriterStmt.cpp | 6 + clang/test/CXX/class/class.local/p1-0x.cpp | 4 +- .../CodeGenCXX/builtin-source-location.cpp | 2 + .../default-arguments-with-immediate.cpp | 54 ++++ .../default-argument-with-immediate-calls.cpp | 34 +++ .../cxx2a-consteval-default-params.cpp | 68 +++++ clang/test/SemaCXX/source_location.cpp | 64 ++++- 22 files changed, 789 insertions(+), 146 deletions(-) create mode 100644 clang/test/CodeGenCXX/default-arguments-with-immediate.cpp create mode 100644 clang/test/PCH/default-argument-with-immediate-calls.cpp create mode 100644 clang/test/SemaCXX/cxx2a-consteval-default-params.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7bb1405c131ab..e16191e06e337 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -578,6 +578,11 @@ C++ Language Changes in Clang conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. - Implemented DR2358 allowing init captures in lambdas in default arguments. +- Implemented DR2631. Invalid ``consteval`` calls in default arguments and default + member initializers are diagnosed when and if the default is used. + This Fixes `Issue 56379 `_ + and changes the value of ``std::source_location::current()`` + used in default parameters calls compared to previous versions of Clang. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 0b927c0294752..098720d9469f0 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -1245,8 +1245,12 @@ class CXXThrowExpr : public Expr { /// This wraps up a function call argument that was created from the /// corresponding parameter's default argument, when the call did not /// explicitly supply arguments for all of the parameters. -class CXXDefaultArgExpr final : public Expr { +class CXXDefaultArgExpr final + : public Expr, + private llvm::TrailingObjects { friend class ASTStmtReader; + friend class ASTReader; + friend TrailingObjects; /// The parameter whose default is being used. ParmVarDecl *Param; @@ -1255,7 +1259,7 @@ class CXXDefaultArgExpr final : public Expr { DeclContext *UsedContext; CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *Param, - DeclContext *UsedContext) + Expr *RewrittenExpr, DeclContext *UsedContext) : Expr(SC, Param->hasUnparsedDefaultArg() ? Param->getType().getNonReferenceType() @@ -1264,28 +1268,58 @@ class CXXDefaultArgExpr final : public Expr { Param->getDefaultArg()->getObjectKind()), Param(Param), UsedContext(UsedContext) { CXXDefaultArgExprBits.Loc = Loc; + CXXDefaultArgExprBits.HasRewrittenInit = RewrittenExpr != nullptr; + if (RewrittenExpr) + *getTrailingObjects() = RewrittenExpr; setDependence(computeDependence(this)); } + CXXDefaultArgExpr(EmptyShell Empty, bool HasRewrittenInit) + : Expr(CXXDefaultArgExprClass, Empty) { + CXXDefaultArgExprBits.HasRewrittenInit = HasRewrittenInit; + } + + size_t numTrailingObjects() const { + return CXXDefaultArgExprBits.HasRewrittenInit; + } + public: - CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {} + static CXXDefaultArgExpr *CreateEmpty(const ASTContext &C, + bool HasRewrittenInit); // \p Param is the parameter whose default argument is used by this // expression. static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, - DeclContext *UsedContext) { - return new (C) - CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, UsedContext); - } - + ParmVarDecl *Param, Expr *RewrittenExpr, + DeclContext *UsedContext); // Retrieve the parameter that the argument was created from. const ParmVarDecl *getParam() const { return Param; } ParmVarDecl *getParam() { return Param; } - // Retrieve the actual argument to the function call. - const Expr *getExpr() const { return getParam()->getDefaultArg(); } - Expr *getExpr() { return getParam()->getDefaultArg(); } + bool hasRewrittenInit() const { + return CXXDefaultArgExprBits.HasRewrittenInit; + } + + // Retrieve the argument to the function call. + Expr *getExpr(); + const Expr *getExpr() const { + return const_cast(this)->getExpr(); + } + + Expr *getRewrittenExpr() { + return hasRewrittenInit() ? *getTrailingObjects() : nullptr; + } + + const Expr *getRewrittenExpr() const { + return const_cast(this)->getRewrittenExpr(); + } + + // Retrieve the rewritten init expression (for an init expression containing + // immediate calls) with the top level FullExpr and ConstantExpr stripped off. + Expr *getAdjustedRewrittenExpr(); + const Expr *getAdjustedRewrittenExpr() const { + return const_cast(this)->getAdjustedRewrittenExpr(); + } const DeclContext *getUsedContext() const { return UsedContext; } DeclContext *getUsedContext() { return UsedContext; } @@ -1322,10 +1356,13 @@ class CXXDefaultArgExpr final : public Expr { /// is implicitly used in a mem-initializer-list in a constructor /// (C++11 [class.base.init]p8) or in aggregate initialization /// (C++1y [dcl.init.aggr]p7). -class CXXDefaultInitExpr : public Expr { - friend class ASTReader; - friend class ASTStmtReader; +class CXXDefaultInitExpr final + : public Expr, + private llvm::TrailingObjects { + friend class ASTStmtReader; + friend class ASTReader; + friend TrailingObjects; /// The field whose default is being used. FieldDecl *Field; @@ -1333,16 +1370,29 @@ class CXXDefaultInitExpr : public Expr { DeclContext *UsedContext; CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, QualType Ty, DeclContext *UsedContext); + FieldDecl *Field, QualType Ty, DeclContext *UsedContext, + Expr *RewrittenInitExpr); + + CXXDefaultInitExpr(EmptyShell Empty, bool HasRewrittenInit) + : Expr(CXXDefaultInitExprClass, Empty) { + CXXDefaultInitExprBits.HasRewrittenInit = HasRewrittenInit; + } - CXXDefaultInitExpr(EmptyShell Empty) : Expr(CXXDefaultInitExprClass, Empty) {} + size_t numTrailingObjects() const { + return CXXDefaultInitExprBits.HasRewrittenInit; + } public: + static CXXDefaultInitExpr *CreateEmpty(const ASTContext &C, + bool HasRewrittenInit); /// \p Field is the non-static data member whose default initializer is used /// by this expression. static CXXDefaultInitExpr *Create(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, DeclContext *UsedContext) { - return new (Ctx) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), UsedContext); + FieldDecl *Field, DeclContext *UsedContext, + Expr *RewrittenInitExpr); + + bool hasRewrittenInit() const { + return CXXDefaultInitExprBits.HasRewrittenInit; } /// Get the field whose initializer will be used. @@ -1350,13 +1400,23 @@ class CXXDefaultInitExpr : public Expr { const FieldDecl *getField() const { return Field; } /// Get the initialization expression that will be used. + Expr *getExpr(); const Expr *getExpr() const { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - return Field->getInClassInitializer(); + return const_cast(this)->getExpr(); + } + + /// Retrieve the initializing expression with evaluated immediate calls, if + /// any. + const Expr *getRewrittenExpr() const { + assert(hasRewrittenInit() && "expected a rewritten init expression"); + return *getTrailingObjects(); } - Expr *getExpr() { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - return Field->getInClassInitializer(); + + /// Retrieve the initializing expression with evaluated immediate calls, if + /// any. + Expr *getRewrittenExpr() { + assert(hasRewrittenInit() && "expected a rewritten init expression"); + return *getTrailingObjects(); } const DeclContext *getUsedContext() const { return UsedContext; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 49a66a1ea5b86..a894111be896a 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -686,6 +686,9 @@ class alignas(void *) Stmt { unsigned : NumExprBits; + /// Whether this CXXDefaultArgExpr rewrote its argument and stores a copy. + unsigned HasRewrittenInit : 1; + /// The location where the default argument expression was used. SourceLocation Loc; }; @@ -696,6 +699,10 @@ class alignas(void *) Stmt { unsigned : NumExprBits; + /// Whether this CXXDefaultInitExprBitfields rewrote its argument and stores + /// a copy. + unsigned HasRewrittenInit : 1; + /// The location where the default initializer expression was used. SourceLocation Loc; }; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1b1db765fa7a9..a720da687550a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2644,6 +2644,10 @@ def err_invalid_consteval_take_address : Error< " of an immediate invocation">; def err_invalid_consteval_call : Error< "call to consteval function %q0 is not a constant expression">; +def note_invalid_consteval_initializer : Note< + "in the default initalizer of %0">; +def note_invalid_consteval_initializer_here : Note< + "initialized here %0">; def err_invalid_consteval_decl_kind : Error< "%0 cannot be declared consteval">; def err_invalid_constexpr : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e8c9cb966bae7..f5151ac7f4c3e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1330,6 +1330,25 @@ class Sema final { bool InDiscardedStatement; bool InImmediateFunctionContext; + bool IsCurrentlyCheckingDefaultArgumentOrInitializer = false; + + // When evaluating immediate functions in the initializer of a default + // argument or default member initializer, this is the declaration whose + // default initializer is being evaluated and the location of the call + // or constructor definition. + struct InitializationContext { + InitializationContext(SourceLocation Loc, ValueDecl *Decl, + DeclContext *Context) + : Loc(Loc), Decl(Decl), Context(Context) { + assert(Decl && Context && "invalid initialization context"); + }; + + SourceLocation Loc; + ValueDecl *Decl = nullptr; + DeclContext *Context = nullptr; + }; + llvm::Optional DelayedDefaultInitializationContext; + ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -6196,19 +6215,22 @@ class Sema final { bool IsStdInitListInitialization, bool RequiresZeroInit, unsigned ConstructKind, SourceRange ParenRange); + ExprResult ConvertMemberDefaultInitExpression(FieldDecl *FD, Expr *InitExpr, + SourceLocation InitLoc); + ExprResult BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field); /// Instantiate or parse a C++ default argument expression as necessary. /// Return true on error. bool CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param); + ParmVarDecl *Param, Expr *Init = nullptr, + bool SkipImmediateInvocations = true); /// BuildCXXDefaultArgExpr - Creates a CXXDefaultArgExpr, instantiating /// the default expr if needed. - ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, - ParmVarDecl *Param); + ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, + ParmVarDecl *Param, Expr *Init = nullptr); /// FinalizeVarWithDestructor - Prepare for calling destructor on the /// constructed variable. @@ -9612,6 +9634,63 @@ class Sema final { return ExprEvalContexts.back().isImmediateFunctionContext(); } + bool isCheckingDefaultArgumentOrInitializer() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + const ExpressionEvaluationContextRecord &Ctx = ExprEvalContexts.back(); + return (Ctx.Context == + ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || + Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer; + } + + bool isCheckingDefaultArgumentOrInitializerOfOuterEntity() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if ((Ctx.Context == + ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || + Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer) + return true; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + return false; + } + return false; + } + + llvm::Optional + InnermostDeclarationWithDelayedImmediateInvocations() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && + Ctx.DelayedDefaultInitializationContext) + return Ctx.DelayedDefaultInitializationContext; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + break; + } + return llvm::None; + } + + llvm::Optional + OutermostDeclarationWithDelayedImmediateInvocations() const { + assert(!ExprEvalContexts.empty() && + "Must be in an expression evaluation context"); + llvm::Optional + Res; + for (auto &Ctx : llvm::reverse(ExprEvalContexts)) { + if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && + !Ctx.DelayedDefaultInitializationContext && Res) + break; + if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || + Ctx.isUnevaluated()) + break; + Res = Ctx.DelayedDefaultInitializationContext; + } + return Res; + } + /// RAII class used to determine whether SFINAE has /// trapped any errors that occur during template argument /// deduction. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 631dfaebabbd6..188c0f593a9ef 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -7687,9 +7687,16 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (Error Err = ImportDefaultArgOfParmVarDecl(*FromParam, ToParam)) return std::move(Err); } - + Expr *RewrittenInit = nullptr; + if (E->hasRewrittenInit()) { + ExpectedExpr ExprOrErr = import(E->getExpr()); + if (!ExprOrErr) + return ExprOrErr.takeError(); + RewrittenInit = ExprOrErr.get(); + } return CXXDefaultArgExpr::Create(Importer.getToContext(), *ToUsedLocOrErr, - *ToParamOrErr, *UsedContextOrErr); + *ToParamOrErr, RewrittenInit, + *UsedContextOrErr); } ExpectedStmt @@ -8381,8 +8388,16 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { ToField->setInClassInitializer(*ToInClassInitializerOrErr); } + Expr *RewrittenInit = nullptr; + if (E->hasRewrittenInit()) { + ExpectedExpr ExprOrErr = import(E->getExpr()); + if (!ExprOrErr) + return ExprOrErr.takeError(); + RewrittenInit = ExprOrErr.get(); + } + return CXXDefaultInitExpr::Create(Importer.getToContext(), *ToBeginLocOrErr, - ToField, *UsedContextOrErr); + ToField, *UsedContextOrErr, RewrittenInit); } ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 04808643ab84a..659d4f74d7042 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2889,8 +2889,7 @@ Expr *ParmVarDecl::getDefaultArg() { Expr *Arg = getInit(); if (auto *E = dyn_cast_or_null(Arg)) - if (!isa(E)) - return E->getSubExpr(); + return E->getSubExpr(); return Arg; } diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 3bf3eab72846c..6a6f692dec787 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -949,9 +949,43 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const { return cast(getCalleeDecl())->getLiteralIdentifier(); } +CXXDefaultArgExpr *CXXDefaultArgExpr::CreateEmpty(const ASTContext &C, + bool HasRewrittenInit) { + size_t Size = totalSizeToAlloc(HasRewrittenInit); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultArgExpr(EmptyShell(), HasRewrittenInit); +} + +CXXDefaultArgExpr *CXXDefaultArgExpr::Create(const ASTContext &C, + SourceLocation Loc, + ParmVarDecl *Param, + Expr *RewrittenExpr, + DeclContext *UsedContext) { + size_t Size = totalSizeToAlloc(RewrittenExpr != nullptr); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, + RewrittenExpr, UsedContext); +} + +Expr *CXXDefaultArgExpr::getExpr() { + return CXXDefaultArgExprBits.HasRewrittenInit ? getAdjustedRewrittenExpr() + : getParam()->getDefaultArg(); +} + +Expr *CXXDefaultArgExpr::getAdjustedRewrittenExpr() { + assert(hasRewrittenInit() && + "expected this CXXDefaultArgExpr to have a rewritten init."); + Expr *Init = getRewrittenExpr(); + if (auto *E = dyn_cast_if_present(Init)) + if (!isa(E)) + return E->getSubExpr(); + return Init; +} + CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, FieldDecl *Field, - QualType Ty, DeclContext *UsedContext) + QualType Ty, DeclContext *UsedContext, + Expr *RewrittenInitExpr) : Expr(CXXDefaultInitExprClass, Ty.getNonLValueExprType(Ctx), Ty->isLValueReferenceType() ? VK_LValue : Ty->isRValueReferenceType() ? VK_XValue @@ -959,11 +993,43 @@ CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, /*FIXME*/ OK_Ordinary), Field(Field), UsedContext(UsedContext) { CXXDefaultInitExprBits.Loc = Loc; + CXXDefaultInitExprBits.HasRewrittenInit = RewrittenInitExpr != nullptr; + + if (CXXDefaultInitExprBits.HasRewrittenInit) + *getTrailingObjects() = RewrittenInitExpr; + assert(Field->hasInClassInitializer()); setDependence(computeDependence(this)); } +CXXDefaultInitExpr *CXXDefaultInitExpr::CreateEmpty(const ASTContext &C, + bool HasRewrittenInit) { + size_t Size = totalSizeToAlloc(HasRewrittenInit); + auto *Mem = C.Allocate(Size, alignof(CXXDefaultInitExpr)); + return new (Mem) CXXDefaultInitExpr(EmptyShell(), HasRewrittenInit); +} + +CXXDefaultInitExpr *CXXDefaultInitExpr::Create(const ASTContext &Ctx, + SourceLocation Loc, + FieldDecl *Field, + DeclContext *UsedContext, + Expr *RewrittenInitExpr) { + + size_t Size = totalSizeToAlloc(RewrittenInitExpr != nullptr); + auto *Mem = Ctx.Allocate(Size, alignof(CXXDefaultArgExpr)); + return new (Mem) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), + UsedContext, RewrittenInitExpr); +} + +Expr *CXXDefaultInitExpr::getExpr() { + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + if (hasRewrittenInit()) + return getRewrittenExpr(); + + return Field->getInClassInitializer(); +} + CXXTemporary *CXXTemporary::Create(const ASTContext &C, const CXXDestructorDecl *Destructor) { return new (C) CXXTemporary(Destructor); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index d918ea26b9d9d..3a7f5426d4a70 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -648,6 +648,11 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) { Actions.ActOnStartCXXInClassMemberInitializer(); + // The initializer isn't actually potentially evaluated unless it is + // used. + EnterExpressionEvaluationContext Eval( + Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed); + ExprResult Init = ParseCXXMemberInitializer(MI.Field, /*IsFunction=*/false, EqualLoc); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index bbffff5394f04..a2f07ea5d59fc 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3184,7 +3184,11 @@ ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction, "Data member initializer not starting with '=' or '{'"); EnterExpressionEvaluationContext Context( - Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, D); + Actions, + isa_and_present(D) + ? Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed + : Sema::ExpressionEvaluationContext::PotentiallyEvaluated, + D); if (TryConsumeToken(tok::equal, EqualLoc)) { if (Tok.is(tok::kw_delete)) { // In principle, an initializer of '= delete p;' is legal, but it will diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index ea7997b347959..c78ce37f372a0 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -4039,6 +4039,21 @@ ExprResult Sema::ActOnRequiresClause(ExprResult ConstraintExpr) { return ConstraintExpr; } +ExprResult Sema::ConvertMemberDefaultInitExpression(FieldDecl *FD, + Expr *InitExpr, + SourceLocation InitLoc) { + InitializedEntity Entity = + InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); + InitializationKind Kind = + FD->getInClassInitStyle() == ICIS_ListInit + ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), + InitExpr->getBeginLoc(), + InitExpr->getEndLoc()) + : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); + InitializationSequence Seq(*this, Entity, Kind, InitExpr); + return Seq.Perform(*this, Entity, Kind, InitExpr); +} + /// This is invoked after parsing an in-class initializer for a /// non-static C++ class member, and after instantiating an in-class initializer /// in a class template. Such actions are deferred until the class is complete. @@ -4067,16 +4082,7 @@ void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D, ExprResult Init = InitExpr; if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) { - InitializedEntity Entity = - InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); - InitializationKind Kind = - FD->getInClassInitStyle() == ICIS_ListInit - ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), - InitExpr->getBeginLoc(), - InitExpr->getEndLoc()) - : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); - InitializationSequence Seq(*this, Entity, Kind, InitExpr); - Init = Seq.Perform(*this, Entity, Kind, InitExpr); + Init = ConvertMemberDefaultInitExpression(FD, InitExpr, InitLoc); if (Init.isInvalid()) { FD->setInvalidDecl(); return; @@ -15617,70 +15623,6 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, Constructor); } -ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - assert(Field->hasInClassInitializer()); - - // If we already have the in-class initializer nothing needs to be done. - if (Field->getInClassInitializer()) - return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); - - // If we might have already tried and failed to instantiate, don't try again. - if (Field->isInvalidDecl()) - return ExprError(); - - // Maybe we haven't instantiated the in-class initializer. Go check the - // pattern FieldDecl to see if it has one. - CXXRecordDecl *ParentRD = cast(Field->getParent()); - - if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { - CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); - DeclContext::lookup_result Lookup = - ClassPattern->lookup(Field->getDeclName()); - - FieldDecl *Pattern = nullptr; - for (auto *L : Lookup) { - if (isa(L)) { - Pattern = cast(L); - break; - } - } - assert(Pattern && "We must have set the Pattern!"); - - if (!Pattern->hasInClassInitializer() || - InstantiateInClassInitializer(Loc, Field, Pattern, - getTemplateInstantiationArgs(Field))) { - // Don't diagnose this again. - Field->setInvalidDecl(); - return ExprError(); - } - return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); - } - - // DR1351: - // If the brace-or-equal-initializer of a non-static data member - // invokes a defaulted default constructor of its class or of an - // enclosing class in a potentially evaluated subexpression, the - // program is ill-formed. - // - // This resolution is unworkable: the exception specification of the - // default constructor can be needed in an unevaluated context, in - // particular, in the operand of a noexcept-expression, and we can be - // unable to compute an exception specification for an enclosed class. - // - // Any attempt to resolve the exception specification of a defaulted default - // constructor before the initializer is lexically complete will ultimately - // come here at which point we can diagnose it. - RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); - Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) - << OutermostClass << Field; - Diag(Field->getEndLoc(), - diag::note_default_member_initializer_not_yet_parsed); - // Recover by marking the field invalid, unless we're in a SFINAE context. - if (!isSFINAEContext()) - Field->setInvalidDecl(); - return ExprError(); -} - void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) { if (VD->isInvalidDecl()) return; // If initializing the variable failed, don't also diagnose problems with diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2493b4a76d5e1..51bd0004a389a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5856,8 +5856,10 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param) { + ParmVarDecl *Param, Expr *RewrittenInit, + bool SkipImmediateInvocations) { if (Param->hasUnparsedDefaultArg()) { + assert(!RewrittenInit && "Should not have a rewritten init expression yet"); // If we've already cleared out the location for the default argument, // that means we're parsing it right now. if (!UnparsedDefaultArgLocs.count(Param)) { @@ -5874,11 +5876,14 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, return true; } - if (Param->hasUninstantiatedDefaultArg() && - InstantiateDefaultArgument(CallLoc, FD, Param)) - return true; + if (Param->hasUninstantiatedDefaultArg()) { + assert(!RewrittenInit && "Should not have a rewitten init expression yet"); + if (InstantiateDefaultArgument(CallLoc, FD, Param)) + return true; + } - assert(Param->hasInit() && "default argument but no initializer?"); + Expr *Init = RewrittenInit ? RewrittenInit : Param->getInit(); + assert(Init && "default argument but no initializer?"); // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll @@ -5887,34 +5892,239 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. - if (auto Init = dyn_cast(Param->getInit())) { + if (auto *InitWithCleanup = dyn_cast(Init)) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. - Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); - + Cleanup.setExprNeedsCleanups(InitWithCleanup->cleanupsHaveSideEffects()); // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. - assert(!Init->getNumObjects() && + assert(!InitWithCleanup->getNumObjects() && "default argument expression has capturing blocks?"); } - - // We already type-checked the argument, so we know it works. - // Just mark all of the declarations in this potentially-evaluated expression - // as being "referenced". EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), - /*SkipLocalVariables=*/true); + ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer = + SkipImmediateInvocations; + MarkDeclarationsReferencedInExpr(Init, /*SkipLocalVariables*/ true); return false; } +struct ImmediateCallVisitor : public RecursiveASTVisitor { + bool HasImmediateCalls = false; + + bool VisitCallExpr(CallExpr *E) { + if (const FunctionDecl *FD = E->getDirectCallee()) + HasImmediateCalls |= FD->isConsteval(); + return RecursiveASTVisitor::VisitStmt(E); + } + + // SourceLocExpr are not immediate invocations + // but CXXDefaultInitExpr/CXXDefaultArgExpr containing a SourceLocExpr + // need to be rebuilt so that they refer to the correct SourceLocation and + // DeclContext. + bool VisitSourceLocExpr(SourceLocExpr *E) { + HasImmediateCalls = true; + return RecursiveASTVisitor::VisitStmt(E); + } + + // A nested lambda might have parameters with immediate invocations + // in their default arguments. + // The compound statement is not visited (as it does not constitute a + // subexpression). + // FIXME: We should consider visiting and transforming captures + // with init expressions. + bool VisitLambdaExpr(LambdaExpr *E) { + return VisitCXXMethodDecl(E->getCallOperator()); + } + + // Blocks don't support default parameters, and, as for lambdas, + // we don't consider their body a subexpression. + bool VisitBlockDecl(BlockDecl *B) { return false; } + + bool VisitCompoundStmt(CompoundStmt *B) { + assert("Unexpected Compound statement in default parameter or initializer"); + return false; + } + + bool VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { + return TraverseStmt(E->getExpr()); + } + + bool VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { + return TraverseStmt(E->getExpr()); + } +}; + +struct EnsureImmediateInvocationInDefaultArgs + : TreeTransform { + EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef) + : TreeTransform(SemaRef) {} + + // Lambda can only have immediate invocations in the default + // args of their parameters, which is transformed upon calling the closure. + // The body is not a subexpression, so we have nothing to do. + // FIXME: Immediate calls in capture initializers should be transformed. + ExprResult TransformLambdaExpr(LambdaExpr *E) { return E; } + ExprResult TransformBlockExpr(BlockExpr *E) { return E; } +}; + ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, ParmVarDecl *Param) { + FunctionDecl *FD, ParmVarDecl *Param, + Expr *Init) { assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); - if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) + + bool NestedDefaultChecking = + isCheckingDefaultArgumentOrInitializerOfOuterEntity(); + + llvm::Optional + InitializationContext = + OutermostDeclarationWithDelayedImmediateInvocations(); + if (!InitializationContext.has_value()) + InitializationContext.emplace(CallLoc, Param, CurContext); + + if (!Init && !Param->hasUnparsedDefaultArg()) { + // Mark that we are replacing a default argument first. + // If we are instantiating a template we won't have to + // retransform immediate calls. + EnterExpressionEvaluationContext EvalContext( + *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); + ExprEvalContexts.back().DelayedDefaultInitializationContext = { + CallLoc, Param, CurContext}; + + if (Param->hasUninstantiatedDefaultArg()) { + if (InstantiateDefaultArgument(CallLoc, FD, Param)) + return ExprError(); + } else { + // CWG2631 + // An immediate invocation that is not evaluated where it appears is + // evaluated and checked for whether it is a constant expression at the + // point where the enclosing initializer is used in a function call. + ImmediateCallVisitor V; + if (!NestedDefaultChecking) + V.TraverseDecl(Param); + if (V.HasImmediateCalls) { + EnsureImmediateInvocationInDefaultArgs Immediate(*this); + ExprResult Res = Immediate.TransformExpr(Param->getInit()); + if (Res.isInvalid()) + return ExprError(); + Res = ConvertParamDefaultArgument(Param, Res.get(), + Res.get()->getBeginLoc()); + if (Res.isInvalid()) + return ExprError(); + Init = Res.get(); + } + } + } + + if (CheckCXXDefaultArgExpr( + CallLoc, FD, Param, Init, + /*SkipImmediateInvocations=*/NestedDefaultChecking)) return ExprError(); - return CXXDefaultArgExpr::Create(Context, CallLoc, Param, CurContext); + + return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param, + Init, InitializationContext->Context); +} + +ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { + assert(Field->hasInClassInitializer()); + + // If we might have already tried and failed to instantiate, don't try again. + if (Field->isInvalidDecl()) + return ExprError(); + + auto *ParentRD = cast(Field->getParent()); + + llvm::Optional + InitializationContext = + OutermostDeclarationWithDelayedImmediateInvocations(); + if (!InitializationContext.has_value()) + InitializationContext.emplace(Loc, Field, CurContext); + + Expr *Init = nullptr; + + bool NestedDefaultChecking = + isCheckingDefaultArgumentOrInitializerOfOuterEntity(); + + if (!Field->getInClassInitializer()) { + // Maybe we haven't instantiated the in-class initializer. Go check the + // pattern FieldDecl to see if it has one. + if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { + CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); + DeclContext::lookup_result Lookup = + ClassPattern->lookup(Field->getDeclName()); + + FieldDecl *Pattern = nullptr; + for (auto *L : Lookup) { + if ((Pattern = dyn_cast(L))) + break; + } + assert(Pattern && "We must have set the Pattern!"); + if (!Pattern->hasInClassInitializer() || + InstantiateInClassInitializer(Loc, Field, Pattern, + getTemplateInstantiationArgs(Field))) { + Field->setInvalidDecl(); + return ExprError(); + } + } + } else { + // CWG2631 + // An immediate invocation that is not evaluated where it appears is + // evaluated and checked for whether it is a constant expression at the + // point where the enclosing initializer is used in a [...] a constructor + // definition, or an aggregate initialization. + EnterExpressionEvaluationContext EvalContext( + *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field); + ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field, + CurContext}; + + ImmediateCallVisitor V; + if (!NestedDefaultChecking) + V.TraverseDecl(Field); + if (V.HasImmediateCalls) { + EnsureImmediateInvocationInDefaultArgs Immediate(*this); + ExprResult Res = Immediate.TransformExpr(Field->getInClassInitializer()); + if (!Res.isInvalid()) + Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc); + if (!Res.isInvalid()) + Res = MaybeCreateExprWithCleanups(Res.get()); + if (Res.isInvalid()) { + Field->setInvalidDecl(); + return ExprError(); + } + Init = Res.get(); + } else if (!NestedDefaultChecking) { + MarkDeclarationsReferencedInExpr(Field->getInClassInitializer()); + } + } + if (Field->getInClassInitializer()) + return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc, + Field, InitializationContext->Context, + Init); + + // DR1351: + // If the brace-or-equal-initializer of a non-static data member + // invokes a defaulted default constructor of its class or of an + // enclosing class in a potentially evaluated subexpression, the + // program is ill-formed. + // + // This resolution is unworkable: the exception specification of the + // default constructor can be needed in an unevaluated context, in + // particular, in the operand of a noexcept-expression, and we can be + // unable to compute an exception specification for an enclosed class. + // + // Any attempt to resolve the exception specification of a defaulted default + // constructor before the initializer is lexically complete will ultimately + // come here at which point we can diagnose it. + RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); + Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) + << OutermostClass << Field; + Diag(Field->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); + // Recover by marking the field invalid, unless we're in a SFINAE context. + if (!isSFINAEContext()) + Field->setInvalidDecl(); + return ExprError(); } Sema::VariadicCallType @@ -17539,6 +17749,7 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { if (isUnevaluatedContext() || !E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() || + isCheckingDefaultArgumentOrInitializer() || RebuildingImmediateInvocation || isImmediateFunctionContext()) return E; @@ -17584,8 +17795,14 @@ static void EvaluateAndDiagnoseImmediateInvocation( FD = Call->getConstructor(); else llvm_unreachable("unhandled decl kind"); - assert(FD->isConsteval()); + assert(FD && FD->isConsteval()); SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call) << FD; + if (auto Context = + SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) { + SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer) + << Context->Decl; + SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at); + } for (auto &Note : Notes) SemaRef.Diag(Note.first, Note.second); return; @@ -19731,7 +19948,8 @@ void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { if (auto *FD = dyn_cast(E->getDecl())) if (!isUnevaluatedContext() && !isConstantEvaluated() && - !isImmediateFunctionContext() && FD->isConsteval() && + !isImmediateFunctionContext() && + !isCheckingDefaultArgumentOrInitializer() && FD->isConsteval() && !RebuildingImmediateInvocation && !FD->isDependentContext()) ExprEvalContexts.back().ReferenceToConsteval.insert(E); MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 9e41dfbfdbe95..709162e01809b 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1978,9 +1978,9 @@ ExprResult TemplateInstantiator::TransformCXXDefaultArgExpr( assert(!cast(E->getParam()->getDeclContext())-> getDescribedFunctionTemplate() && "Default arg expressions are never formed in dependent cases."); - return SemaRef.BuildCXXDefaultArgExpr(E->getUsedLocation(), - cast(E->getParam()->getDeclContext()), - E->getParam()); + return SemaRef.BuildCXXDefaultArgExpr( + E->getUsedLocation(), cast(E->getParam()->getDeclContext()), + E->getParam()); } template @@ -3407,6 +3407,8 @@ bool Sema::InstantiateInClassInitializer( ContextRAII SavedContext(*this, Instantiation->getParent()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + ExprEvalContexts.back().DelayedDefaultInitializationContext = { + PointOfInstantiation, Instantiation, CurContext}; LocalInstantiationScope Scope(*this, true); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index ab34a9d611b9c..ead72463aca78 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3172,9 +3172,10 @@ class TreeTransform { /// By default, builds a new default-argument expression, which does not /// require any semantic analysis. Subclasses may override this routine to /// provide different behavior. - ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) { + ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param, + Expr *RewrittenExpr) { return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param, - getSema().CurContext); + RewrittenExpr, getSema().CurContext); } /// Build a new C++11 default-initialization expression. @@ -3184,8 +3185,7 @@ class TreeTransform { /// routine to provide different behavior. ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field, - getSema().CurContext); + return getSema().BuildCXXDefaultInitExpr(Loc, Field); } /// Build a new C++ zero-initialization expression. @@ -12094,11 +12094,20 @@ TreeTransform::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (!Param) return ExprError(); + ExprResult InitRes; + if (E->hasRewrittenInit()) { + InitRes = getDerived().TransformExpr(E->getRewrittenExpr()); + if (InitRes.isInvalid()) + return ExprError(); + } + if (!getDerived().AlwaysRebuild() && Param == E->getParam() && - E->getUsedContext() == SemaRef.CurContext) + E->getUsedContext() == SemaRef.CurContext && + InitRes.get() == E->getRewrittenExpr()) return E; - return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param); + return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param, + InitRes.get()); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 2a3c6e7231785..08f9f0bf50d03 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1824,6 +1824,9 @@ void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { E->Param = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultArgExprBits.Loc = readSourceLocation(); + E->CXXDefaultArgExprBits.HasRewrittenInit = Record.readInt(); + if (E->CXXDefaultArgExprBits.HasRewrittenInit) + *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { @@ -1831,6 +1834,9 @@ void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { E->Field = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultInitExprBits.Loc = readSourceLocation(); + E->CXXDefaultInitExprBits.HasRewrittenInit = Record.readInt(); + if (E->CXXDefaultInitExprBits.HasRewrittenInit) + *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { @@ -3829,11 +3835,13 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_DEFAULT_ARG: - S = new (Context) CXXDefaultArgExpr(Empty); + S = CXXDefaultArgExpr::CreateEmpty( + Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); break; case EXPR_CXX_DEFAULT_INIT: - S = new (Context) CXXDefaultInitExpr(Empty); + S = CXXDefaultInitExpr::CreateEmpty( + Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); break; case EXPR_CXX_BIND_TEMPORARY: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e2ba69ca1eec8..6e4101ac122ee 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1745,6 +1745,9 @@ void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Record.AddDeclRef(E->getParam()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getUsedLocation()); + Record.push_back(E->hasRewrittenInit()); + if (E->hasRewrittenInit()) + Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_ARG; } @@ -1753,6 +1756,9 @@ void ASTStmtWriter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { Record.AddDeclRef(E->getField()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getExprLoc()); + Record.push_back(E->hasRewrittenInit()); + if (E->hasRewrittenInit()) + Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_INIT; } diff --git a/clang/test/CXX/class/class.local/p1-0x.cpp b/clang/test/CXX/class/class.local/p1-0x.cpp index 49125f5f9b062..096f5080099ec 100644 --- a/clang/test/CXX/class/class.local/p1-0x.cpp +++ b/clang/test/CXX/class/class.local/p1-0x.cpp @@ -11,8 +11,8 @@ void f() { int x = 3; // expected-note{{'x' declared here}} struct C { int& x2 = x; // expected-error{{reference to local variable 'x' declared in enclosing lambda expression}} - }; + }c; // expected-note {{required here}} }; - C(); + C(); // expected-note {{required here}} } diff --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp index 6e44e6b0e60e3..7af6749d0d6d6 100644 --- a/clang/test/CodeGenCXX/builtin-source-location.cpp +++ b/clang/test/CodeGenCXX/builtin-source-location.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -no-opaque-pointers -std=c++2a -fblocks %s -triple x86_64-unknown-unknown -emit-llvm -o %t.ll +// RUN: %clang_cc1 -no-opaque-pointers -std=c++14 -fblocks %s -triple x86_64-unknown-unknown -emit-llvm -o %t.ll + // This needs to be performed before #line directives which alter filename // RUN: %clang_cc1 -no-opaque-pointers -fno-file-reproducible -fmacro-prefix-map=%p=/UNLIKELY/PATH -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PREFIX-MAP diff --git a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp new file mode 100644 index 0000000000000..54a02ffc06836 --- /dev/null +++ b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -std=c++2a -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +consteval int immediate() { return 0;} +static int ext(); +void f(int a = immediate() + ext()); + +void test_function() { + f(); + f(0); + // CHECK: call noundef i32 @_ZL3extv() + // CHECK: add + // CHECK: call {{.*}} @_Z1fi + // CHECK: call {{.*}} @_Z1fi +} + +// CHECK: define {{.*}} i32 @_ZL3extv() + +static constexpr int not_immediate(); +struct A { + int a = immediate() + not_immediate(); +}; + +void test_member() { + // CHECK: call void @_ZN1AC2Ev + A defaulted; + // CHECK-NOT: call void @_ZN1AC2Ev + A provided{0}; +} + +// CHECK: define {{.*}} void @_ZN1AC2Ev{{.*}} +// CHECK: %call = call noundef i32 @_ZL13not_immediatev() + +int never_referenced() {return 42;}; + + +namespace not_used { + +struct A { + int a = immediate() + never_referenced(); +}; +void f(int a = immediate() + never_referenced()); + +void g() { + A a{0}; + f(0); +} + +} + +static int ext() {return 0;} +static constexpr int not_immediate() {return 0;} + +// CHECK-NOT: define {{.*}} i32 _ZL16never_referencedv()( +// CHECK: define {{.*}} i32 @_ZL13not_immediatev() diff --git a/clang/test/PCH/default-argument-with-immediate-calls.cpp b/clang/test/PCH/default-argument-with-immediate-calls.cpp new file mode 100644 index 0000000000000..510605a23d4e7 --- /dev/null +++ b/clang/test/PCH/default-argument-with-immediate-calls.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=c++20 -emit-pch %s -o %t +// RUN: %clang_cc1 -std=c++20 -include-pch %t -verify %s +// expected-no-diagnostics + +#ifndef HEADER_INCLUDED +#define HEADER_INCLUDED + +consteval int immediate(); +int regular_function() { + return 0; +} + +struct S { + int a = immediate() + regular_function(); +}; + +int f(int arg = immediate()) { + return arg; +} + +#else + +consteval int immediate() { + return 0; +} + +void test() { + f(0); + f(); + S s{0}; + S t{0}; +} + +#endif diff --git a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp new file mode 100644 index 0000000000000..511306e0d921a --- /dev/null +++ b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s + +consteval int undefined(); // expected-note 4 {{declared here}} + +void check_lambdas_unused( + int a = [] + { + // The body of a lambda is not a subexpression of the lambda + // so this is immediately evaluated even if the parameter + // is never used. + return undefined(); // expected-error {{not a constant expression}} \ + // expected-note {{undefined function 'undefined'}} + }(), + int b = [](int no_error = undefined()) { + return no_error; + }(0), + int c = [](int defaulted = undefined()) { + return defaulted; + }() +) {} + +int check_lambdas_used( + int b = [](int no_error = undefined()) { + return no_error; + }(0), + int c = [](int defaulted = undefined()) { // expected-error {{not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{undefined function 'undefined'}} + return defaulted; + }(), // expected-note {{in the default initalizer of 'defaulted'}} + int d = [](int defaulted = sizeof(undefined())) { + return defaulted; + }() +) { + return 0; +} + +int test_check_lambdas_used = check_lambdas_used(); + +struct UnusedInitWithLambda { + int a = [] { + return undefined(); // expected-error {{not a constant expression}} \ + // expected-note {{undefined function 'undefined'}} + }(); + // UnusedInitWithLambda is never constructed, so the initializer + // of b and undefined() are never evaluated. + int b = [](int no_error = undefined()) { + return no_error; + }(); +}; + +consteval int ub(int n) { + return 0/n; // expected-note {{division}} +} + +struct InitWithLambda { + int b = [](int error = undefined()) { // expected-error {{not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{undefined function 'undefined'}} + return error; + }(); // expected-note {{in the default initalizer of 'error'}} + int c = [](int error = sizeof(undefined()) + ub(0)) { // expected-error {{'ub' is not a constant expression}} \ + // expected-note {{declared here}} \ + // expected-note {{in call to 'ub(0)}} + return error; + }(); // expected-note {{in the default initalizer of 'error'}} +} i; // expected-note {{in implicit default constructor}} diff --git a/clang/test/SemaCXX/source_location.cpp b/clang/test/SemaCXX/source_location.cpp index ccb385f60dc4b..9cfe9207dd14d 100644 --- a/clang/test/SemaCXX/source_location.cpp +++ b/clang/test/SemaCXX/source_location.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fexceptions -verify %s +// RUN: %clang_cc1 -std=c++2a -fcxx-exceptions -DUSE_CONSTEVAL -fexceptions -verify %s // expected-no-diagnostics #define assert(...) ((__VA_ARGS__) ? ((void)0) : throw 42) @@ -8,15 +9,22 @@ template struct Printer; +#ifdef USE_CONSTEVAL +#define SOURCE_LOC_EVAL_KIND consteval +#else +#define SOURCE_LOC_EVAL_KIND constexpr +#endif + namespace std { class source_location { struct __impl; public: - static constexpr source_location current(const __impl *__p = __builtin_source_location()) noexcept { - source_location __loc; - __loc.__m_impl = __p; - return __loc; + static SOURCE_LOC_EVAL_KIND source_location + current(const __impl *__p = __builtin_source_location()) noexcept { + source_location __loc; + __loc.__m_impl = __p; + return __loc; } constexpr source_location() = default; constexpr source_location(source_location const &) = default; @@ -593,3 +601,51 @@ namespace TestConstexprContext { } static_assert(test()); } + +namespace Lambda { +#line 8000 "TestLambda.cpp" +constexpr int nested_lambda(int l = []{ + return SL::current().line(); +}()) { + return l; +} +static_assert(nested_lambda() == __LINE__ - 4); + +constexpr int lambda_param(int l = [](int l = SL::current().line()) { + return l; +}()) { + return l; +} +static_assert(lambda_param() == __LINE__); + + +} + +constexpr int compound_literal_fun(int a = + (int){ SL::current().line() } +) { return a ;} +static_assert(compound_literal_fun() == __LINE__); + +struct CompoundLiteral { + int a = (int){ SL::current().line() }; +}; +static_assert(CompoundLiteral{}.a == __LINE__); + + +// FIXME +// Init captures are subexpressions of the lambda expression +// so according to the standard immediate invocations in init captures +// should be evaluated at the call site. +// However Clang does not yet implement this as it would introduce +// a fair bit of complexity. +// We intend to implement that functionality once we find real world +// use cases that require it. +constexpr int test_init_capture(int a = + [b = SL::current().line()] { return b; }()) { + return a; +} +#ifdef USE_CONSTEVAL +static_assert(test_init_capture() == __LINE__ - 4); +#else +static_assert(test_init_capture() == __LINE__ ); +#endif From 4b604cc27e28a0edcb22a5ae5f29b8ff3bd0eacf Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Fri, 4 Nov 2022 14:59:37 +0100 Subject: [PATCH 244/516] [flang] Update fir.select_type op to fit design document Update the already existing fir.select_type operation to reflect decisions made in the polymorphic entities design document. The verifier is updated to check that the select is polymorphic. The case attributes name are changed and some tests are added. Reviewed By: jeanPerier Differential Revision: https://reviews.llvm.org/D137403 --- .../include/flang/Optimizer/Dialect/FIRAttr.h | 4 ++-- .../include/flang/Optimizer/Dialect/FIROps.td | 8 ++++---- flang/lib/Optimizer/Dialect/FIROps.cpp | 7 +++++-- flang/test/Fir/convert-to-llvm-invalid.fir | 8 ++++---- flang/test/Fir/fir-ops.fir | 8 ++++---- flang/test/Fir/invalid.fir | 19 +++++++++++++++++++ 6 files changed, 38 insertions(+), 16 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.h b/flang/include/flang/Optimizer/Dialect/FIRAttr.h index 92b3f7a8e6f62..f88d6c6a4f97f 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.h +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.h @@ -38,7 +38,7 @@ class ExactTypeAttr using Base::Base; using ValueType = mlir::Type; - static constexpr llvm::StringRef getAttrName() { return "instance"; } + static constexpr llvm::StringRef getAttrName() { return "type_is"; } static ExactTypeAttr get(mlir::Type value); mlir::Type getType() const; @@ -51,7 +51,7 @@ class SubclassAttr using Base::Base; using ValueType = mlir::Type; - static constexpr llvm::StringRef getAttrName() { return "subsumed"; } + static constexpr llvm::StringRef getAttrName() { return "class_is"; } static SubclassAttr get(mlir::Type value); mlir::Type getType() const; diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 12938525b7062..76a117d78e73e 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -651,10 +651,10 @@ def fir_SelectTypeOp : fir_SwitchTerminatorOp<"select_type"> { ```mlir fir.select_type %arg : !fir.box<()> [ - #fir.instance>, ^bb1(%0 : i32), - #fir.instance>, ^bb2(%2 : i32), - #fir.subsumed>, ^bb3(%2 : i32), - #fir.instance>, ^bb4(%1,%3 : i32,f32), + #fir.type_is>, ^bb1(%0 : i32), + #fir.type_is>, ^bb2(%2 : i32), + #fir.class_is>, ^bb3(%2 : i32), + #fir.type_is>, ^bb4(%1,%3 : i32,f32), unit, ^bb5] ``` }]; diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 450900b351029..86628b792068b 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -2970,8 +2970,11 @@ void fir::SelectTypeOp::print(mlir::OpAsmPrinter &p) { } mlir::LogicalResult fir::SelectTypeOp::verify() { - if (!(getSelector().getType().isa())) - return emitOpError("must be a boxed type"); + if (!(getSelector().getType().isa())) + return emitOpError("must be a fir.class or fir.box type"); + if (auto boxType = getSelector().getType().dyn_cast()) + if (!boxType.getEleTy().isa()) + return emitOpError("selector must be polymorphic"); auto cases = getOperation()->getAttrOfType(getCasesAttr()).getValue(); auto count = getNumDest(); diff --git a/flang/test/Fir/convert-to-llvm-invalid.fir b/flang/test/Fir/convert-to-llvm-invalid.fir index 7cba7fc55b8e3..bdc2525876ac2 100644 --- a/flang/test/Fir/convert-to-llvm-invalid.fir +++ b/flang/test/Fir/convert-to-llvm-invalid.fir @@ -71,14 +71,14 @@ func.func @shape_shift_not_dead(%arg0: !fir.ref>, %i: index, // Test `fir.select_type` conversion to llvm. // Should have been converted. -func.func @bar_select_type(%arg : !fir.box>) -> i32 { +func.func @bar_select_type(%arg : !fir.class>) -> i32 { %0 = arith.constant 1 : i32 %2 = arith.constant 3 : i32 // expected-error@+2{{fir.select_type should have already been converted}} // expected-error@+1{{failed to legalize operation 'fir.select_type'}} - fir.select_type %arg : !fir.box> [ - #fir.instance>,^bb1(%0:i32), - #fir.instance>,^bb2(%2:i32), + fir.select_type %arg : !fir.class> [ + #fir.type_is>,^bb1(%0:i32), + #fir.type_is>,^bb2(%2:i32), unit,^bb5 ] ^bb1(%a : i32) : return %a : i32 diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir index 5052a06d90bcd..486c7ee809910 100644 --- a/flang/test/Fir/fir-ops.fir +++ b/flang/test/Fir/fir-ops.fir @@ -322,8 +322,8 @@ func.func @bar_select_rank(%arg : i32, %arg2 : i32) -> i32 { } // CHECK-LABEL: func @bar_select_type( -// CHECK-SAME: [[VAL_101:%.*]]: !fir.box}>>) -> i32 { -func.func @bar_select_type(%arg : !fir.box}>>) -> i32 { +// CHECK-SAME: [[VAL_101:%.*]]: !fir.class}>>) -> i32 { +func.func @bar_select_type(%arg : !fir.class}>>) -> i32 { // CHECK: [[VAL_102:%.*]] = arith.constant 1 : i32 // CHECK: [[VAL_103:%.*]] = arith.constant 2 : i32 @@ -334,8 +334,8 @@ func.func @bar_select_type(%arg : !fir.box}>> [#fir.instance>, ^bb1([[VAL_102]] : i32), #fir.instance>, ^bb2([[VAL_104]] : i32), #fir.subsumed>, ^bb3([[VAL_104]] : i32), #fir.instance>, ^bb4([[VAL_103]] : i32), unit, ^bb5] - fir.select_type %arg : !fir.box}>> [ #fir.instance>,^bb1(%0:i32), #fir.instance>,^bb2(%2:i32), #fir.subsumed>,^bb3(%2:i32), #fir.instance>,^bb4(%1:i32), unit,^bb5 ] +// CHECK: fir.select_type [[VAL_101]] : !fir.class}>> [#fir.type_is>, ^bb1([[VAL_102]] : i32), #fir.type_is>, ^bb2([[VAL_104]] : i32), #fir.class_is>, ^bb3([[VAL_104]] : i32), #fir.type_is>, ^bb4([[VAL_103]] : i32), unit, ^bb5] + fir.select_type %arg : !fir.class}>> [ #fir.type_is>,^bb1(%0:i32), #fir.type_is>,^bb2(%2:i32), #fir.class_is>,^bb3(%2:i32), #fir.type_is>,^bb4(%1:i32), unit,^bb5 ] // CHECK: ^bb1([[VAL_106:%.*]]: i32): // CHECK: return [[VAL_106]] : i32 diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index 5d0ac39184211..fdb4249c4ad81 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -928,3 +928,22 @@ func.func @bad_array_declare_unlimited_polymorphic_boxaddr(%arg0: !fir.ref>>>, !fir.shift<2>) -> !fir.ref>>> return } + +// ----- + +func.func @invalid_selector(%arg : !fir.box>) -> i32 { + %0 = arith.constant 1 : i32 + %2 = arith.constant 3 : i32 + // expected-error@+1{{'fir.select_type' op selector must be polymorphic}} + fir.select_type %arg : !fir.box> [ + #fir.type_is>,^bb1(%0:i32), + #fir.type_is>,^bb2(%2:i32), + unit,^bb5 ] +^bb1(%a : i32) : + return %a : i32 +^bb2(%b : i32) : + return %b : i32 +^bb5 : + %zero = arith.constant 0 : i32 + return %zero : i32 +} From 749242668885d86a4f8bd6a5e697250b6cdb36d3 Mon Sep 17 00:00:00 2001 From: Peixin Qiao Date: Fri, 4 Nov 2022 22:15:58 +0800 Subject: [PATCH 245/516] [flang] Fix function result rewrite When the function result is `type(c_ptr/c_funptr)`, and the function has or does not have BIND(C) attribute, the function result is not taken as the first argument of the function call in other compilers such as gfortran and ifort. Fix it to be consistent with gfortran/ifort by changing the abstract result type check. Fix #58739. Reviewed By: PeteSteinfeld, jeanPerier Differential Revision: https://reviews.llvm.org/D137236 --- flang/lib/Optimizer/Dialect/FIRType.cpp | 4 ++++ flang/test/Fir/abstract-results.fir | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 67b4d1af7cf17..c509ce0fcdcfb 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -948,6 +948,10 @@ bool fir::hasAbstractResult(mlir::FunctionType ty) { if (ty.getNumResults() == 0) return false; auto resultType = ty.getResult(0); + // FIXME: The interoperable derived type needs more investigations and tests. + // The derived type without BIND attribute may also not be abstract result. + if (fir::isa_builtin_cptr_type(resultType)) + return false; return resultType.isa(); } diff --git a/flang/test/Fir/abstract-results.fir b/flang/test/Fir/abstract-results.fir index 580f7c6d22e13..92d803e4994ba 100644 --- a/flang/test/Fir/abstract-results.fir +++ b/flang/test/Fir/abstract-results.fir @@ -202,6 +202,17 @@ func.func @call_chararrayfunc() { // FUNC-BOX-NOT: fir.save_result } +func.func private @rettcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> attributes {fir.bindc_name = "rettcptr"} + +// FUNC-REF-LABEL: func @_QPtest_return_cptr() { +// FUNC-BOX-LABEL: func @_QPtest_return_cptr() { +func.func @_QPtest_return_cptr() { + // FUNC-REF: [[VAL:.*]] = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + // FUNC-BOX: [[VAL:.*]] = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + %1 = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + return +} + // ------------------------ Test fir.address_of rewrite ------------------------ func.func private @takesfuncarray((i32) -> !fir.array) From fe9409b9fad352bbad7064f6c8be1e1ef0b12586 Mon Sep 17 00:00:00 2001 From: Peixin Qiao Date: Fri, 4 Nov 2022 22:19:38 +0800 Subject: [PATCH 246/516] [flang][RFC] Change the interface for non-BIND(C) CPTR type with VALUE attribute When the `type(c_ptr/c_funptr)` argument has value attribute in non-BIND(C) procedure, it is passed by VALUE in gfortran. ifort does not do this. Be consistent with gfortran. Fix #58756. Reviewed By: PeteSteinfeld, jeanPerier Differential Revision: https://reviews.llvm.org/D137237 --- flang/lib/Lower/CallInterface.cpp | 8 +++++--- flang/test/Lower/call-by-value.f90 | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 20258f37a8126..0832f101d4bb2 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -929,13 +929,15 @@ class Fortran::lower::CallInterfaceImpl { PassEntityBy passBy = PassEntityBy::BaseAddress; Property prop = Property::BaseAddress; if (isValueAttr) { + bool isBuiltinCptrType = fir::isa_builtin_cptr_type(type); if (isBindC || (!type.isa() && !obj.attrs.test(Attrs::Optional) && - dynamicType.category() != - Fortran::common::TypeCategory::Derived)) { + (dynamicType.category() != + Fortran::common::TypeCategory::Derived || + isBuiltinCptrType))) { passBy = PassEntityBy::Value; prop = Property::Value; - if (fir::isa_builtin_cptr_type(type)) { + if (isBuiltinCptrType) { auto recTy = type.dyn_cast(); mlir::Type fieldTy = recTy.getTypeList()[0].second; passType = fir::ReferenceType::get(fieldTy); diff --git a/flang/test/Lower/call-by-value.f90 b/flang/test/Lower/call-by-value.f90 index 717da1afd99e5..b9f9dc1a24eff 100644 --- a/flang/test/Lower/call-by-value.f90 +++ b/flang/test/Lower/call-by-value.f90 @@ -73,3 +73,20 @@ subroutine test_char_value(x) bind(c) character(1), value :: x call internal_call4(x) end + +! CHECK-LABEL: func.func @_QPtest_cptr_value( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref +! CHECK: fir.call @_QPinternal_call5(%[[VAL_1]]) : (!fir.ref>) -> () +! CHECK: return +! CHECK: } + +subroutine test_cptr_value(x) + use iso_c_binding + type(c_ptr), value :: x + call internal_call5(x) +end From 42220c58685e0f20685ba23867b8507280850a7a Mon Sep 17 00:00:00 2001 From: David Truby Date: Tue, 1 Nov 2022 14:12:14 +0000 Subject: [PATCH 247/516] [flang][RFC] Proposal for complex number lowering through MLIR This design document proposes lowering FIR complex number operations through the MLIR complex dialect. Differential Revision: https://reviews.llvm.org/D134364 --- flang/docs/ComplexOperations.md | 76 +++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 flang/docs/ComplexOperations.md diff --git a/flang/docs/ComplexOperations.md b/flang/docs/ComplexOperations.md new file mode 100644 index 0000000000000..6faa1811fd6d0 --- /dev/null +++ b/flang/docs/ComplexOperations.md @@ -0,0 +1,76 @@ +# Complex Operations + +```eval_rst +.. contents:: + :local: +``` + +Fortran includes support for complex number types and a set of operators and +intrinsics that work on these types. Some of those operations are complicated +and require runtime function calls to implement. + +This document outlines a design for generating these operations using the MLIR +complex dialect while avoiding cross-platform ABI issues. + +## FIR Representation + +MLIR contains a complex dialect, similar to the Math dialect also used for +lowering some integer and floating point operations in Flang. Conversion between +fir.complex types and MLIR complex types is supported. + +As a result at the FIR level, complex operations can be represented as +conversions from the fir.complex type to the equivalent MLIR complex type, use +of the MLIR operation and a conversion back. + +This is similar to the way the math intrinsics are lowered, as proposed [here][1] + +**Fortran** +```fortran +function pow_self(c) + complex, intent(in) :: c + complex :: pow_self + pow_self = c ** c +end function pow_self +``` + +**FIR** +```c +func.func @_QPpow_self(%arg0: !fir.ref>) -> !fir.complex<4> { + %0 = fir.alloca !fir.complex<4> + %1 = fir.load %arg0 : !fir.ref> + %2 = fir.load %arg0 : !fir.ref> + %3 = fir.convert %1 : (!fir.complex<4>) -> complex + %4 = fir.convert %2 : (!fir.complex<4>) -> complex + %5 = complex.pow %3, %4 : complex + %6 = fir.convert %5 : (complex) -> !fir.complex<4> + fir.store %6 to %0 : !fir.ref> + %7 = fir.load %0 : !fir.ref> + return %7 : !fir.complex<4> + } +``` + +Some operations are currently missing in the MLIR complex dialect that we would +want to use here, such as powi and the hyperbolic trigonometry functions. +For the missing operations we call directly to libm where possible, for powi +we provide an implementation in the flang runtime. + +## Lowering + +The MLIR complex dialect supports lowering either by emitting calls to the +complex functions in libm (ComplexToLibm), or through lowering to the standard +dialect (ComplexToStandard). However, as MLIR has no target awareness, the +lowering to libm functions suffers from ABI incompatibilities on some platforms. +As such the custom lowering to the standard dialect is used. This may be +something to revisit in future if performance could be improved by using the +libm functions. + +Similarly to the numerical lowering through the math dialect, certain MLIR +optimisations could violate the precise floating point model, so when that is +requested lowering manually emits calls to libm, rather than going through the +MLIR complex dialect. + +The ComplexToStandard dialect does still call into libm for some floating +point math operations, however these don't have the same ABI issues as the +complex libm functions. + +[1]: https://discourse.llvm.org/t/rfc-change-lowering-of-fortran-math-intrinsics/63971 From 5f87a892a7bed9cb0599573b9aaf387bc1df9c14 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 4 Nov 2022 15:37:18 +0100 Subject: [PATCH 248/516] [clang] Fix assert in SemaEXpr.cpp --- clang/lib/Sema/SemaExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 51bd0004a389a..40f6af8d8e8ca 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5943,7 +5943,7 @@ struct ImmediateCallVisitor : public RecursiveASTVisitor { bool VisitBlockDecl(BlockDecl *B) { return false; } bool VisitCompoundStmt(CompoundStmt *B) { - assert("Unexpected Compound statement in default parameter or initializer"); + assert(false && "Unexpected Compound statement in default parameter or initializer"); return false; } From 710e34e1360710275662ad5b0bdc394570fb26d5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 2 Nov 2022 17:10:20 -0400 Subject: [PATCH 249/516] [VectorCombine] move load safety checks to helper function; NFC These checks can be re-used with other potential transforms such as a load of a subvector-insert. --- .../Transforms/Vectorize/VectorCombine.cpp | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index a21add2d47256..bac72b8fa8ebb 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -127,6 +127,27 @@ class VectorCombine { }; } // namespace +static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) { + // Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan. + // The widened load may load data from dirty regions or create data races + // non-existent in the source. + if (!Load || !Load->isSimple() || !Load->hasOneUse() || + Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || + mustSuppressSpeculation(*Load)) + return false; + + // We are potentially transforming byte-sized (8-bit) memory accesses, so make + // sure we have all of our type-based constraints in place for this target. + Type *ScalarTy = Load->getType()->getScalarType(); + uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); + unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || + ScalarSize % 8 != 0) + return false; + + return true; +} + bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Match insert into fixed vector of scalar value. // TODO: Handle non-zero insert index. @@ -142,35 +163,22 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { if (!HasExtract) X = Scalar; - // Match source value as load of scalar or vector. - // Do not vectorize scalar load (widening) if atomic/volatile or under - // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions - // or create data races non-existent in the source. auto *Load = dyn_cast(X); - if (!Load || !Load->isSimple() || !Load->hasOneUse() || - Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || - mustSuppressSpeculation(*Load)) + if (!canWidenLoad(Load, TTI)) return false; - const DataLayout &DL = I.getModule()->getDataLayout(); - Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); - assert(isa(SrcPtr->getType()) && "Expected a pointer type"); - - unsigned AS = Load->getPointerAddressSpace(); - - // We are potentially transforming byte-sized (8-bit) memory accesses, so make - // sure we have all of our type-based constraints in place for this target. Type *ScalarTy = Scalar->getType(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); - if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || - ScalarSize % 8 != 0) - return false; // Check safety of replacing the scalar load with a larger vector load. // We use minimal alignment (maximum flexibility) because we only care about // the dereferenceable region. When calculating cost and creating a new op, // we may use a larger value based on alignment attributes. + const DataLayout &DL = I.getModule()->getDataLayout(); + Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); + assert(isa(SrcPtr->getType()) && "Expected a pointer type"); + unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); unsigned OffsetEltIndex = 0; @@ -215,6 +223,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Use the greater of the alignment on the load or its source pointer. Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); Type *LoadTy = Load->getType(); + unsigned AS = Load->getPointerAddressSpace(); InstructionCost OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); From d9c52c31a0fee22414bb4024ee5b203fe86ef5bb Mon Sep 17 00:00:00 2001 From: Karthik Senthil Date: Fri, 4 Nov 2022 10:33:45 -0400 Subject: [PATCH 250/516] [LV][IVDescriptors] Fix recurrence identity element for FMin and FMax reductions For a min and max reduction idioms, the identity (i.e. neutral) element should be datatype's highest and lowest possible values respectively. Current implementation in IVDescriptors incorrectly returns -Inf for FMin reduction and +Inf for FMax reduction. This patch fixes this bug which was causing incorrect reduction computation results in loops vectorized by LV. Differential Revision: https://reviews.llvm.org/D137220 --- llvm/lib/Analysis/IVDescriptors.cpp | 8 +- .../AArch64/scalable-reduction-inloop-cond.ll | 2 +- .../LoopVectorize/reduction-inloop-cond.ll | 2 +- llvm/unittests/Analysis/IVDescriptorsTest.cpp | 104 ++++++++++++++++++ 4 files changed, 112 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index e42512b41aa66..c76155832bce3 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1111,9 +1111,13 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, return ConstantInt::get(Tp, APInt::getSignedMinValue(Tp->getIntegerBitWidth())); case RecurKind::FMin: - return ConstantFP::getInfinity(Tp, true); + assert((FMF.noNaNs() && FMF.noSignedZeros()) && + "nnan, nsz is expected to be set for FP min reduction."); + return ConstantFP::getInfinity(Tp, false /*Negative*/); case RecurKind::FMax: - return ConstantFP::getInfinity(Tp, false); + assert((FMF.noNaNs() && FMF.noSignedZeros()) && + "nnan, nsz is expected to be set for FP max reduction."); + return ConstantFP::getInfinity(Tp, true /*Negative*/); case RecurKind::SelectICmp: case RecurKind::SelectFCmp: return getRecurrenceStartValue(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 091b117c182ac..9ae930514e613 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -117,7 +117,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP11]], i32 4, [[TMP8]], poison) -; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP8]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0xFFF0000000000000, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP8]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP12]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 8be98ebf087d5..ff3a071fc507a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -168,7 +168,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> +; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> ; CHECK-NEXT: [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]]) ; CHECK-NEXT: [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/unittests/Analysis/IVDescriptorsTest.cpp b/llvm/unittests/Analysis/IVDescriptorsTest.cpp index e7948db10ae66..fd9a5a801042c 100644 --- a/llvm/unittests/Analysis/IVDescriptorsTest.cpp +++ b/llvm/unittests/Analysis/IVDescriptorsTest.cpp @@ -203,3 +203,107 @@ TEST(IVDescriptorsTest, LoopWithPtrToInt) { EXPECT_TRUE(IsInductionPHI); }); } + +// This tests that correct identity value is returned for a RecurrenceDescriptor +// that describes FMin reduction idiom. +TEST(IVDescriptorsTest, FMinRednIdentity) { + // Parse the module. + LLVMContext Context; + + std::unique_ptr M = parseIR(Context, + R"(define float @foo(float* %A, i64 %ub) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %fmin = phi float [ 1.000000e+00, %entry ], [ %fmin.next, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %i + %ld = load float, float* %arrayidx + %fmin.cmp = fcmp nnan nsz olt float %fmin, %ld + %fmin.next = select nnan nsz i1 %fmin.cmp, float %fmin, float %ld + %i.next = add nsw i64 %i, 1 + %cmp = icmp slt i64 %i.next, %ub + br i1 %cmp, label %for.body, label %for.end + +for.end: + %fmin.lcssa = phi float [ %fmin.next, %for.body ] + ret float %fmin.lcssa +})"); + + runWithLoopInfoAndSE( + *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.body"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + BasicBlock::iterator BBI = Header->begin(); + assert((&*BBI)->getName() == "i"); + ++BBI; + PHINode *Phi = dyn_cast(&*BBI); + assert(Phi->getName() == "fmin"); + RecurrenceDescriptor Rdx; + bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx); + EXPECT_TRUE(IsRdxPhi); + RecurKind Kind = Rdx.getRecurrenceKind(); + EXPECT_EQ(Kind, RecurKind::FMin); + Type *Ty = Phi->getType(); + Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags()); + // Identity value for FP min reduction is +Inf. + EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, false /*Negative*/)); + }); +} + +// This tests that correct identity value is returned for a RecurrenceDescriptor +// that describes FMax reduction idiom. +TEST(IVDescriptorsTest, FMaxRednIdentity) { + // Parse the module. + LLVMContext Context; + + std::unique_ptr M = parseIR(Context, + R"(define float @foo(float* %A, i64 %ub) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %fmax = phi float [ 1.000000e+00, %entry ], [ %fmax.next, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %i + %ld = load float, float* %arrayidx + %fmax.cmp = fcmp nnan nsz ogt float %fmax, %ld + %fmax.next = select nnan nsz i1 %fmax.cmp, float %fmax, float %ld + %i.next = add nsw i64 %i, 1 + %cmp = icmp slt i64 %i.next, %ub + br i1 %cmp, label %for.body, label %for.end + +for.end: + %fmax.lcssa = phi float [ %fmax.next, %for.body ] + ret float %fmax.lcssa +})"); + + runWithLoopInfoAndSE( + *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.body"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + BasicBlock::iterator BBI = Header->begin(); + assert((&*BBI)->getName() == "i"); + ++BBI; + PHINode *Phi = dyn_cast(&*BBI); + assert(Phi->getName() == "fmax"); + RecurrenceDescriptor Rdx; + bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx); + EXPECT_TRUE(IsRdxPhi); + RecurKind Kind = Rdx.getRecurrenceKind(); + EXPECT_EQ(Kind, RecurKind::FMax); + Type *Ty = Phi->getType(); + Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags()); + // Identity value for FP max reduction is -Inf. + EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, true /*Negative*/)); + }); +} From 6875ac69279a3a02fab382a2c8d121558ecbfa91 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 4 Nov 2022 15:45:06 +0100 Subject: [PATCH 251/516] [clang] Remove an incorrect assert Compound statements can appear in default arguments but should not be visited in ImmediateCallVisitor as they are not subexpressions --- clang/lib/Sema/SemaExpr.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 40f6af8d8e8ca..ec67a6b6f28e7 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5943,7 +5943,6 @@ struct ImmediateCallVisitor : public RecursiveASTVisitor { bool VisitBlockDecl(BlockDecl *B) { return false; } bool VisitCompoundStmt(CompoundStmt *B) { - assert(false && "Unexpected Compound statement in default parameter or initializer"); return false; } From 901e5a7539e6ac8ebe15f721fd23e2c47f161769 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 4 Nov 2022 11:52:38 +0000 Subject: [PATCH 252/516] Precommit test for redundant mvn instructions --- .../illegal-floating-point-vector-compares.ll | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll new file mode 100644 index 0000000000000..6d2f75b86011e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; All tests are doing unordered vector comparisons on vectors larger than a +; Neon vector. + +define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <8 x i1> %a_cmp to i8 + %all_zero = icmp eq i8 %cmp_result, 0 + ret i1 %all_zero +} + +define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v3.16b, v3.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <16 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <16 x i1> %a_cmp to i16 + %all_zero = icmp eq i16 %cmp_result, 0 + ret i1 %all_zero +} + +define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0 +; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0 +; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0 +; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 +; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v7.16b, v7.16b +; CHECK-NEXT: mvn v6.16b, v6.16b +; CHECK-NEXT: mvn v5.16b, v5.16b +; CHECK-NEXT: mvn v4.16b, v4.16b +; CHECK-NEXT: mvn v3.16b, v3.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v4.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: bic w0, w9, w8 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <32 x i1> %a_cmp to i32 + %all_zero = icmp eq i32 %cmp_result, 0 + ret i1 %all_zero +} From 777f03479941f32b0f2b6aa447af1e469dfa9d5b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 29 Oct 2022 21:04:35 +0200 Subject: [PATCH 253/516] [libc++] inline more functions into basic_string This removes a lot of boilerplate. Reviewed By: ldionne, #libc, EricWF Spies: EricWF, libcxx-commits Differential Revision: https://reviews.llvm.org/D137025 --- libcxx/include/string | 277 ++++++++++++++---------------------------- 1 file changed, 91 insertions(+), 186 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 726bba3156f6e..592c63466be74 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1010,9 +1010,12 @@ public: } #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& operator=(basic_string&& __str) - _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(basic_string&& __str) + _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)) { + __move_assign(__str, integral_constant()); + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(initializer_list __il) {return assign(__il.begin(), __il.size());} #endif @@ -1065,7 +1068,17 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT {return __is_long() ? __get_long_size() : __get_short_size();} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type length() const _NOEXCEPT {return size();} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT { + size_type __m = __alloc_traits::max_size(__alloc()); + if (__m <= std::numeric_limits::max() / 2) { + return __m - __alignment; + } else { + bool __uses_lsb = __endian_factor == 2; + return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; + } + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT { return (__is_long() ? __get_long_cap() : static_cast(__min_cap)) - 1; } @@ -1093,9 +1106,15 @@ public: _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {return size() == 0;} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - const_reference operator[](size_type __pos) const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference operator[](size_type __pos) const _NOEXCEPT { + _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); + return *(data() + __pos); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT { + _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); + return *(__get_pointer() + __pos); + } _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const; _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n); @@ -1130,8 +1149,9 @@ public: basic_string& operator+=(initializer_list __il) { return append(__il); } #endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& append(const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const basic_string& __str) { + return append(__str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1189,10 +1209,26 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 void push_back(value_type __c); _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void pop_back(); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); + return *__get_pointer(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); + return *data(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); + return *(__get_pointer() + size() - 1); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); + return *(data() + size() - 1); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1244,8 +1280,10 @@ public: basic_string& assign(initializer_list __il) {return assign(__il.begin(), __il.size());} #endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& insert(size_type __pos1, const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + insert(size_type __pos1, const basic_string& __str) { + return insert(__pos1, __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1271,8 +1309,16 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, const value_type* __s); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, size_type __n, value_type __c); _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator insert(const_iterator __pos, value_type __c); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - iterator insert(const_iterator __pos, size_type __n, value_type __c); + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator + insert(const_iterator __pos, size_type __n, value_type __c) { + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, + "string::insert(iterator, n, value) called with an iterator not referring to this string"); + difference_type __p = __pos - begin(); + insert(static_cast(__p), __n, __c); + return begin() + __p; + } + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 __enable_if_t @@ -1301,8 +1347,10 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator erase(const_iterator __first, const_iterator __last); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(size_type __pos1, size_type __n1, const basic_string& __str) { + return replace(__pos1, __n1, __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1326,8 +1374,12 @@ public: basic_string& replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, const value_type* __s); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const basic_string& __str); + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const basic_string& __str) { + return replace( + static_cast(__i1 - begin()), static_cast(__i2 - __i1), __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1338,12 +1390,21 @@ public: > replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) { __self_view __sv = __t; return replace(__i1 - begin(), __i2 - __i1, __sv); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s, __n); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const value_type* __s) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __n, __c); + } + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 __enable_if_t @@ -1800,8 +1861,9 @@ private: template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_no_alias(const value_type* __s, size_type __n); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - void __erase_to_end(size_type __pos); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_to_end(size_type __pos) { + __null_terminate_at(std::__to_address(__get_pointer()), __pos); + } // __erase_external_with_move is invoked for erase() invocations where // `n ~= npos`, likely requiring memory moves on the string data. @@ -2511,17 +2573,6 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str) - _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)) -{ - __move_assign(__str, integral_constant()); - return *this; -} - #endif template @@ -2762,14 +2813,6 @@ basic_string<_CharT, _Traits, _Allocator>::append( return *this; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str) -{ - return append(__str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -2927,14 +2970,6 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _Forward } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str) -{ - return insert(__pos1, __str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -3005,19 +3040,6 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, value_ty return begin() + static_cast(__ip); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::iterator -basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, size_type __n, value_type __c) -{ - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, - "string::insert(iterator, n, value) called with an iterator not" - " referring to this string"); - difference_type __p = __pos - begin(); - insert(static_cast(__p), __n, __c); - return begin() + __p; -} - // replace template @@ -3119,14 +3141,6 @@ basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_it return replace(__i1, __i2, __temp); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str) -{ - return replace(__pos1, __n1, __str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -3166,39 +3180,6 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __ return replace(__pos, __n1, __s, traits_type::length(__s)); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const basic_string& __str) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), - __str.data(), __str.size()); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s, __n); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __n, __c); -} - // erase // 'externally instantiated' erase() implementation, called when __n != npos. @@ -3295,14 +3276,6 @@ basic_string<_CharT, _Traits, _Allocator>::clear() _NOEXCEPT } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -void -basic_string<_CharT, _Traits, _Allocator>::__erase_to_end(size_type __pos) -{ - __null_terminate_at(std::__to_address(__get_pointer()), __pos); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -3326,20 +3299,6 @@ basic_string<_CharT, _Traits, _Allocator>::__resize_default_init(size_type __n) __erase_to_end(__n); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT -{ - size_type __m = __alloc_traits::max_size(__alloc()); - if (__m <= std::numeric_limits::max() / 2) { - return __m - __alignment; - } else { - bool __uses_lsb = __endian_factor == 2; - return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; - } -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -3436,24 +3395,6 @@ basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target std::__debug_db_invalidate_all(this); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) const _NOEXCEPT -{ - _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); - return *(data() + __pos); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) _NOEXCEPT -{ - _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); - return *(__get_pointer() + __pos); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::const_reference @@ -3474,42 +3415,6 @@ basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) return (*this)[__n]; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::front() _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); - return *__get_pointer(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::front() const _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); - return *data(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::back() _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); - return *(__get_pointer() + size() - 1); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::back() const _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); - return *(data() + size() - 1); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::size_type From 2f211f865decbed393f7c08415398ed840b3a9e3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 4 Nov 2022 16:57:42 +0100 Subject: [PATCH 254/516] [LVI] Improve debug message (NFC) --- llvm/lib/Analysis/LazyValueInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 6f61091a41b98..fe08e512a81f7 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -701,7 +701,8 @@ Optional LazyValueInfoImpl::solveBlockValueNonLocal( // to overdefined. if (Result.isOverdefined()) { LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred (non local).\n"); + << "' - overdefined because of pred '" + << Pred->getName() << "' (non local).\n"); return Result; } } From dd1b1d44503288f98b298f1ec8b374137d2812e2 Mon Sep 17 00:00:00 2001 From: rkayaith Date: Thu, 20 Oct 2022 00:27:09 -0400 Subject: [PATCH 255/516] [mlir][python] Allow adding to existing pass manager This adds a `PassManager.add` method which adds pipeline elements to the pass manager. This allows for progressively building up a pipeline from python without string manipulation. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D137344 --- mlir/lib/Bindings/Python/Pass.cpp | 14 ++++++++++++++ .../python/integration/dialects/linalg/opsrun.py | 16 +++++++++++----- mlir/test/python/pass_manager.py | 14 ++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index 13f1cfa3536ac..cb3c1586eb996 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -100,6 +100,20 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { "Parse a textual pass-pipeline and return a top-level PassManager " "that can be applied on a Module. Throw a ValueError if the pipeline " "can't be parsed") + .def( + "add", + [](PyPassManager &passManager, const std::string &pipeline) { + PyPrintAccumulator errorMsg; + MlirLogicalResult status = mlirOpPassManagerAddPipeline( + mlirPassManagerGetAsOpPassManager(passManager.get()), + mlirStringRefCreate(pipeline.data(), pipeline.size()), + errorMsg.getCallback(), errorMsg.getUserData()); + if (mlirLogicalResultIsFailure(status)) + throw SetPyError(PyExc_ValueError, std::string(errorMsg.join())); + }, + py::arg("pipeline"), + "Add textual pipeline elements to the pass manager. Throws a " + "ValueError if the pipeline can't be parsed.") .def( "run", [](PyPassManager &passManager, PyModule &module) { diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py index 2075ecfc21d01..585741ae9336d 100644 --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -191,11 +191,17 @@ def transform(module, boilerplate): ops = module.operation.regions[0].blocks[0].operations mod = Module.parse("\n".join([str(op) for op in ops]) + boilerplate) - pm = PassManager.parse( - "builtin.module(func.func(convert-linalg-to-loops, lower-affine, " + - "convert-math-to-llvm, convert-scf-to-cf, arith-expand, memref-expand), " - + "convert-vector-to-llvm, convert-memref-to-llvm, convert-func-to-llvm," + - "reconcile-unrealized-casts)") + pm = PassManager('builtin.module') + pm.add("func.func(convert-linalg-to-loops)") + pm.add("func.func(lower-affine)") + pm.add("func.func(convert-math-to-llvm)") + pm.add("func.func(convert-scf-to-cf)") + pm.add("func.func(arith-expand)") + pm.add("func.func(memref-expand)") + pm.add("convert-vector-to-llvm") + pm.add("convert-memref-to-llvm") + pm.add("convert-func-to-llvm") + pm.add("reconcile-unrealized-casts") pm.run(mod) return mod diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index 04e325e13e785..492c7e09ec5ae 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -75,6 +75,20 @@ def testParseFail(): log("Exception not produced") run(testParseFail) +# Check that adding to a pass manager works +# CHECK-LABEL: TEST: testAdd +@run +def testAdd(): + pm = PassManager("any", Context()) + # CHECK: pm: 'any()' + log(f"pm: '{pm}'") + # CHECK: pm: 'any(cse)' + pm.add("cse") + log(f"pm: '{pm}'") + # CHECK: pm: 'any(cse,cse)' + pm.add("cse") + log(f"pm: '{pm}'") + # Verify failure on incorrect level of nesting. # CHECK-LABEL: TEST: testInvalidNesting From 6ace52e5e49cff6664fc301fa4985fc28c88f26f Mon Sep 17 00:00:00 2001 From: Hubert Tong Date: Fri, 4 Nov 2022 12:26:53 -0400 Subject: [PATCH 256/516] [Driver][AIX] Change UNSUPPORTED to XFAIL system-aix Update https://reviews.llvm.org/rGc14df228ff3c to check the host versus the target thereby allowing XFAIL to be used in case the issue is resolved in the future. --- clang/test/Driver/response-file-errs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/response-file-errs.c b/clang/test/Driver/response-file-errs.c index 64eb3208a836c..0fd03ed08c04a 100644 --- a/clang/test/Driver/response-file-errs.c +++ b/clang/test/Driver/response-file-errs.c @@ -1,5 +1,5 @@ // AIX reacts on opening directory differently than other systems. -// UNSUPPORTED: aix +// XFAIL: system-aix // If response file does not exist, '@file; directive remains unexpanded in // command line. From a032b47e7e7792d57a26c0dcb38ecd12c28a0248 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Fri, 4 Nov 2022 04:23:32 +0000 Subject: [PATCH 257/516] [mlir][linalg] Fix neutral element value for minf/maxf The neutral element got minf/maxf should be respectively +inf and -inf. Bug reported by @Hardcode84. Differential Revision: https://reviews.llvm.org/D137385 --- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 6 +++--- .../Dialect/Linalg/transform-op-split-reduction.mlir | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 04cbed0c4e135..ccf7cdc4aadbc 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -1073,10 +1073,10 @@ Optional getNeutralElement(Operation *op) { return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1)); if (isa(op)) return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); + llvm::APFloat::getInf(semantic, /*Negative=*/true)); if (isa(op)) - return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); + return b.getFloatAttr( + resultType, llvm::APFloat::getInf(semantic, /*Negative=*/false)); return Attribute(); } if (isa(op)) diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir index ee5f98bc2ce01..cb7a92198c04d 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -112,7 +112,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> // CHECK-LABEL: func @generic_split_3d -// CHECK-DAG: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK-DAG: %[[ID:.*]] = arith.constant 0xFF800000 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> @@ -238,7 +238,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 - %4 = arith.maxf %3, %arg2 : f32 + %4 = arith.minf %3, %arg2 : f32 linalg.yield %4 : f32 } -> tensor<5x2xf32> return %0 : tensor<5x2xf32> @@ -250,7 +250,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> // CHECK-LABEL: func @generic_split_3d -// CHECK-DAG: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK-DAG: %[[ID:.*]] = arith.constant 0x7F800000 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> @@ -258,12 +258,12 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf -// CHECK: arith.maxf +// CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { -// CHECK: arith.maxf +// CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> // CHECK: return %[[R]] : tensor<5x2xf32> From 8eab182bf2b7683fb5637a01b7664b802c759c2f Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 4 Nov 2022 12:40:45 -0400 Subject: [PATCH 258/516] [RFC][OpenMP][Doc] No backward compatible for libomptarget and plugins Now we state that backward compatibility is not guaranteed in the document. Reviewed By: JonChesterfield, dreachem Differential Revision: https://reviews.llvm.org/D133277 --- openmp/docs/SupportAndFAQ.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index dc1ad83902d28..8bf0ea56a7ae7 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -413,3 +413,17 @@ linkable device image. clang++ openmp.cpp -fopenmp --offload-arch=sm_80 -c clang++ cuda.cu --offload-new-driver --offload-arch=sm_80 -fgpu-rdc -c clang++ openmp.o cuda.o --offload-link -o app + +Q: Are libomptarget and plugins backward compatible? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +No. libomptarget and plugins are now built as LLVM libraries starting from LLVM +15. Because LLVM libraries are not backward compatible, libomptarget and plugins +are not as well. Given that fact, the interfaces between 1) the Clang compiler +and libomptarget, 2) the Clang compiler and device runtime library, and +3) libomptarget and plugins are not guaranteed to be compatible with an earlier +version. Users are responsible for ensuring compatibility when not using the +Clang compiler and runtime libraries from the same build. Nevertheless, in order +to better support third-party libraries and toolchains that depend on existing +libomptarget entry points, contributors are discouraged from making +modifications to them. From c8fab80d64119ffcde78f0e9a70c5babb0da0467 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Nov 2022 15:09:48 -0700 Subject: [PATCH 259/516] [mlir][Transform] NFC - Add custom builders for some useful transforms. Differential Revision: https://reviews.llvm.org/D137443 --- .../Linalg/TransformOps/LinalgTransformOps.h | 6 + .../Linalg/TransformOps/LinalgTransformOps.td | 45 +++++++ .../mlir/Dialect/Transform/IR/TransformOps.td | 11 ++ .../TransformOps/LinalgTransformOps.cpp | 118 +++++++++++++++++- .../Transform/IR/TransformInterfaces.cpp | 6 +- .../lib/Dialect/Transform/IR/TransformOps.cpp | 24 ++++ 6 files changed, 206 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h index f7952db7e2a23..2583875e2d0ea 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h @@ -20,6 +20,12 @@ namespace linalg { class GenericOp; class LinalgOp; } // namespace linalg + +namespace transform { +// Types needed for builders. +struct TileSizesSpec {}; +struct NumThreadsSpec {}; +} // namespace transform } // namespace mlir //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 6cb14acb1b089..347def6c9d1b5 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -112,6 +112,10 @@ def FuseIntoContainingOp : [TransformMappingAlloc, TransformMappingWrite]>:$fused_op); let assemblyFormat = "$producer_op `into` $containing_op attr-dict"; + + let builders = [ + OpBuilder<(ins "Value":$producerOp, "Value":$containingOp)> + ]; } def GeneralizeOp : Op":$opNames)> + ]; + let assemblyFormat = [{ (`ops` `{` $ops^ `}`)? (`interface` `{` $interface^ `}`)? @@ -600,6 +608,15 @@ def SplitReductionOp : Op:$innerParallel, + CArg<"bool", "false">:$useScalingAlgorithm, + CArg<"bool", "false">:$useAlloc)> + ]; + let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::linalg::LinalgOp target, @@ -818,6 +835,30 @@ def TileToForeachThreadOp : OptionalAttr:$thread_dim_mapping); let results = (outs PDL_Operation:$foreach_thread_op, PDL_Operation:$tiled_op); + + let builders = [ + OpBuilder<(ins "Value":$target, + "ArrayRef":$staticTileSizes, + CArg<"::mlir::transform::TileSizesSpec", + "::mlir::transform::TileSizesSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$mixedTileSizes, + CArg<"::mlir::transform::TileSizesSpec", + "::mlir::transform::TileSizesSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$staticNumThreads, + CArg<"::mlir::transform::NumThreadsSpec", + "::mlir::transform::NumThreadsSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$mixedNumThreads, + CArg<"::mlir::transform::NumThreadsSpec", + "::mlir::transform::NumThreadsSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + ]; + let assemblyFormat = [{ $target oilist( `num_threads` custom($num_threads, @@ -943,6 +984,10 @@ def VectorizeOp : Op:$vectorizePadding)> + ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 4b1bb02ee757a..42f8d5cb27698 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -253,6 +253,11 @@ def SplitHandlesOp : TransformDialectOp<"split_handles", let arguments = (ins TransformTypeInterface:$handle, I64Attr:$num_result_handles); let results = (outs Variadic:$results); + + let builders = [ + OpBuilder<(ins "Value":$handle, "int64_t":$numResultHandles)> + ]; + let assemblyFormat = [{ $handle `in` `[` $num_result_handles `]` attr-dict `:` functional-type(operands, results) @@ -305,6 +310,12 @@ def PrintOp : TransformDialectOp<"print", let arguments = (ins Optional:$target, OptionalAttr:$name); let results = (outs); + + let builders = [ + OpBuilder<(ins CArg<"StringRef", "StringRef()">:$name)>, + OpBuilder<(ins "Value":$target, CArg<"StringRef", "StringRef()">:$name)> + ]; + let assemblyFormat = "$target attr-dict (`:` type($target)^)?"; } diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index c8a3cb6946e3d..a35dd14483963 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -254,6 +254,14 @@ LogicalResult transform::FuseOp::verify() { // FuseIntoContainingOp //===----------------------------------------------------------------------===// +void transform::FuseIntoContainingOp::build(OpBuilder &builder, + OperationState &result, + Value producerOp, + Value containingOp) { + result.addOperands({producerOp, containingOp}); + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + /// Find the first "extract" user of `producerOp` and tile it right before its /// use. The tiled op is fused under the `containingOp`. /// Return this fused op on success or nullptr if anything fails. @@ -628,6 +636,14 @@ LogicalResult transform::InterchangeOp::verify() { // MatchOp //===---------------------------------------------------------------------===// +void transform::MatchOp::build(OpBuilder &builder, OperationState &result, + Value target, ArrayRef opNames) { + result.addOperands(target); + result.addAttribute(MatchOp::getOpsAttrName(result.name), + builder.getStrArrayAttr(opNames)); + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + DiagnosedSilenceableFailure transform::MatchOp::apply(transform::TransformResults &results, transform::TransformState &state) { @@ -1069,6 +1085,34 @@ LogicalResult SplitOp::verify() { // SplitReductionOp //===----------------------------------------------------------------------===// +void transform::SplitReductionOp::build( + OpBuilder &builder, OperationState &result, Value target, + int64_t splitFactor, int64_t insertSplitDimension, bool innerParallel, + bool useScalingAlgorithm, bool useAlloc) { + MLIRContext *ctx = builder.getContext(); + result.addOperands(target); + result.addAttribute(SplitReductionOp::getSplitFactorAttrName(result.name), + builder.getI64IntegerAttr(splitFactor)); + result.addAttribute( + SplitReductionOp::getInsertSplitDimensionAttrName(result.name), + builder.getI64IntegerAttr(insertSplitDimension)); + if (innerParallel) { + result.addAttribute(SplitReductionOp::getInnerParallelAttrName(result.name), + builder.getUnitAttr()); + } + if (useScalingAlgorithm) { + result.addAttribute( + SplitReductionOp::getUseScalingAlgorithmAttrName(result.name), + builder.getUnitAttr()); + } + if (useAlloc) { + result.addAttribute(SplitReductionOp::getUseAllocAttrName(result.name), + builder.getUnitAttr()); + } + auto resultType = pdl::OperationType::get(ctx); + result.addTypes({resultType, resultType, resultType, resultType}); +} + DiagnosedSilenceableFailure transform::SplitReductionOp::applyToOne(linalg::LinalgOp target, SmallVectorImpl &results, @@ -1277,13 +1321,75 @@ void transform::TileOp::getEffects( // TileToForeachThreadOp //===----------------------------------------------------------------------===// +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef staticTileSizes, transform::TileSizesSpec, + ArrayRef threadDimMapping) { + return build(builder, result, target, + getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)), + TileSizesSpec(), threadDimMapping); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef mixedTileSizes, transform::TileSizesSpec, + ArrayRef threadDimMapping) { + SmallVector staticTileSizes; + SmallVector dynamicTileSizes; + dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes, + ShapedType::kDynamicSize); + // Call the default builder which sets up the proper operands segment sizes + // attributes for multiple variadic operands. In the absence of this, horrible + // bugs ensue. + MLIRContext *ctx = builder.getContext(); + auto operationType = pdl::OperationType::get(ctx); + auto staticTileSizesAttr = builder.getI64ArrayAttr(staticTileSizes); + ArrayAttr threadDimMappingAttr; + if (!threadDimMapping.empty()) + threadDimMappingAttr = builder.getI64ArrayAttr(threadDimMapping); + build(builder, result, TypeRange{operationType, operationType}, target, + /*numThreads=*/ValueRange{}, dynamicTileSizes, + /*staticNumThreads=*/ArrayAttr(), staticTileSizesAttr, + threadDimMappingAttr); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef staticNumThreads, transform::NumThreadsSpec, + ArrayRef threadDimMapping) { + return build(builder, result, target, + getAsOpFoldResult(builder.getI64ArrayAttr(staticNumThreads)), + NumThreadsSpec(), threadDimMapping); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef mixedNumThreads, transform::NumThreadsSpec, + ArrayRef threadDimMapping) { + SmallVector staticNumThreads; + SmallVector dynamicNumThreads; + dispatchIndexOpFoldResults(mixedNumThreads, dynamicNumThreads, + staticNumThreads, ShapedType::kDynamicSize); + // Call the default builder which sets up the proper operands segment sizes + // attributes for multiple variadic operands. In the absence of this, horrible + // bugs ensue. + MLIRContext *ctx = builder.getContext(); + auto operationType = pdl::OperationType::get(ctx); + auto staticNumThreadsAttr = builder.getI64ArrayAttr(staticNumThreads); + ArrayAttr threadDimMappingAttr; + if (!threadDimMapping.empty()) + threadDimMappingAttr = builder.getI64ArrayAttr(threadDimMapping); + build(builder, result, TypeRange{operationType, operationType}, target, + dynamicNumThreads, /*tileSizes=*/ValueRange{}, staticNumThreadsAttr, + /*staticTileSizes=*/ArrayAttr(), threadDimMappingAttr); +} + DiagnosedSilenceableFailure transform::tileToForeachThreadOpImpl( RewriterBase &rewriter, transform::TransformState &state, TransformOpInterface transformOp, ArrayRef targets, ArrayRef mixedNumThreads, ArrayRef mixedTileSizes, Optional threadDimMapping, SmallVector &tileOps, SmallVector &tiledOps) { - if (targets.empty()) return DiagnosedSilenceableFailure(success()); @@ -1573,6 +1679,16 @@ void transform::TileToScfForOp::getEffects( // VectorizeOp //===----------------------------------------------------------------------===// +void transform::VectorizeOp::build(OpBuilder &builder, OperationState &result, + Value target, bool vectorizePadding) { + result.addOperands(target); + if (vectorizePadding) { + result.addAttribute(VectorizeOp::getVectorizePaddingAttrName(result.name), + builder.getUnitAttr()); + } + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + namespace { /// This is an helper only to call vectorize via a pattern inside of /// VectorizeOp::applyToOne. diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp index 5d84b7b0a6030..9b136cccbe6f1 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp @@ -314,11 +314,11 @@ transform::TransformResults::TransformResults(unsigned numSegments) { void transform::TransformResults::set(OpResult value, ArrayRef ops) { - unsigned position = value.getResultNumber(); - assert(position < segments.size() && + int64_t position = value.getResultNumber(); + assert(position < static_cast(segments.size()) && "setting results for a non-existent handle"); assert(segments[position].data() == nullptr && "results already set"); - unsigned start = operations.size(); + int64_t start = operations.size(); llvm::append_range(operations, ops); segments[position] = makeArrayRef(operations).drop_front(start); } diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 2be1bea91fbe9..5fe2d465ee51a 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -472,6 +472,16 @@ OpFoldResult transform::MergeHandlesOp::fold(ArrayRef operands) { // SplitHandlesOp //===----------------------------------------------------------------------===// +void transform::SplitHandlesOp::build(OpBuilder &builder, + OperationState &result, Value target, + int64_t numResultHandles) { + result.addOperands(target); + result.addAttribute(SplitHandlesOp::getNumResultHandlesAttrName(result.name), + builder.getI64IntegerAttr(numResultHandles)); + auto pdlOpType = pdl::OperationType::get(builder.getContext()); + result.addTypes(SmallVector(numResultHandles, pdlOpType)); +} + DiagnosedSilenceableFailure transform::SplitHandlesOp::apply(transform::TransformResults &results, transform::TransformState &state) { @@ -812,6 +822,20 @@ LogicalResult transform::WithPDLPatternsOp::verify() { // PrintOp //===----------------------------------------------------------------------===// +void transform::PrintOp::build(OpBuilder &builder, OperationState &result, + StringRef name) { + if (!name.empty()) { + result.addAttribute(PrintOp::getNameAttrName(result.name), + builder.getStrArrayAttr(name)); + } +} + +void transform::PrintOp::build(OpBuilder &builder, OperationState &result, + Value target, StringRef name) { + result.addOperands({target}); + build(builder, result, name); +} + DiagnosedSilenceableFailure transform::PrintOp::apply(transform::TransformResults &results, transform::TransformState &state) { From 254bf678d49185026d1b17c6345106f9f3fe58b2 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 4 Nov 2022 09:59:40 -0700 Subject: [PATCH 260/516] [RISCV] Define custom-N opcodes Differential Revision: https://reviews.llvm.org/D137355 --- llvm/lib/Target/RISCV/RISCVInstrFormats.td | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 18b31f85bfdb4..f8de46ea7cfff 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -125,12 +125,14 @@ def lookupRISCVOpcodeByName : SearchIndex { } def OPC_LOAD : RISCVOpcode<"LOAD", 0b0000011>; def OPC_LOAD_FP : RISCVOpcode<"LOAD_FP", 0b0000111>; +def OPC_CUSTOM_0 : RISCVOpcode<"CUSTOM_0", 0b0001011>; def OPC_MISC_MEM : RISCVOpcode<"MISC_MEM", 0b0001111>; def OPC_OP_IMM : RISCVOpcode<"OP_IMM", 0b0010011>; def OPC_AUIPC : RISCVOpcode<"AUIPC", 0b0010111>; def OPC_OP_IMM_32 : RISCVOpcode<"OP_IMM_32", 0b0011011>; def OPC_STORE : RISCVOpcode<"STORE", 0b0100011>; def OPC_STORE_FP : RISCVOpcode<"STORE_FP", 0b0100111>; +def OPC_CUSTOM_1 : RISCVOpcode<"CUSTOM_1", 0b0101011>; def OPC_AMO : RISCVOpcode<"AMO", 0b0101111>; def OPC_OP : RISCVOpcode<"OP", 0b0110011>; def OPC_LUI : RISCVOpcode<"LUI", 0b0110111>; @@ -141,10 +143,12 @@ def OPC_NMSUB : RISCVOpcode<"NMSUB", 0b1001011>; def OPC_NMADD : RISCVOpcode<"NMADD", 0b1001111>; def OPC_OP_FP : RISCVOpcode<"OP_FP", 0b1010011>; def OPC_OP_V : RISCVOpcode<"OP_V", 0b1010111>; +def OPC_CUSTOM_2 : RISCVOpcode<"CUSTOM_2", 0b1011011>; def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>; def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>; def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>; def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>; +def OPC_CUSTOM_3 : RISCVOpcode<"CUSTOM_3", 0b1111011>; class RVInst pattern, InstFormat format> From a7fa5febaa43d860cbd6a4061f239b283c4d8032 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Wed, 2 Nov 2022 14:25:48 -0700 Subject: [PATCH 261/516] [Test] Fix CHECK typo. Differential Revision: https://reviews.llvm.org/D137287 --- .../ignore-conflict/ignore-conflict.cpp | 2 +- clang/test/CodeGenCoroutines/pr56329.cpp | 2 +- .../omp_with_loop_pragma_instr_profile.c | 2 +- .../OpenMP/target_parallel_for_codegen.cpp | 24 +++++++++---------- .../Lower/ext-proc-as-actual-argument-1.f90 | 2 +- .../Lower/ext-proc-as-actual-argument-2.f90 | 2 +- flang/test/Lower/fail_image.f90 | 2 +- .../SymbolFile/NativePDB/inline_sites.test | 14 +++++------ llvm/test/CodeGen/PowerPC/livevars-crash2.mir | 2 +- llvm/test/CodeGen/PowerPC/phi-eliminate.mir | 2 +- llvm/test/CodeGen/PowerPC/vsx-args.ll | 2 +- .../InstrRef/pick-vphi-in-shifting-loop.mir | 2 +- .../MC/WebAssembly/tag-section-decoding.ll | 2 +- .../type-checker-emit-after-unreachable.s | 18 +++++++------- llvm/test/Transforms/Coroutines/coro-debug.ll | 2 +- .../recursion-compression-pseudoprobe.test | 2 +- .../MathToSPIRV/math-to-gl-spirv.mlir | 2 +- mlir/test/Dialect/MemRef/canonicalize.mlir | 2 +- mlir/test/Dialect/Tensor/canonicalize.mlir | 10 ++++---- mlir/test/python/ir/affine_expr.py | 2 +- 20 files changed, 49 insertions(+), 49 deletions(-) diff --git a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp index 8791dd952319d..8a483049c8cd5 100644 --- a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp +++ b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp @@ -1,4 +1,4 @@ class MyType {}; // CHECK: #include // CHECK-NEXT: #include -// CEHCK-NEXT: class MyType {}; +// CHECK-NEXT: class MyType {}; diff --git a/clang/test/CodeGenCoroutines/pr56329.cpp b/clang/test/CodeGenCoroutines/pr56329.cpp index 2e9a1a244e218..855755d05f844 100644 --- a/clang/test/CodeGenCoroutines/pr56329.cpp +++ b/clang/test/CodeGenCoroutines/pr56329.cpp @@ -116,4 +116,4 @@ Task Outer() { // CHECK: musttail call // CHECK: musttail call // CHECK-NEXT: ret void -// CHEKC-NEXT: } +// CHECK-NEXT: } diff --git a/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c b/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c index 9667f9cc549d3..25bfbb7c815de 100644 --- a/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c +++ b/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c @@ -17,6 +17,6 @@ void sub(double *restrict a, double *restrict b, int n) { // CHECK: omp.precond.then: // CHECK-NEXT: call void @llvm.instrprof.increment( // CHECK: cond.true: -// CEHCK-NEXT: call void @llvm.instrprof.increment( +// CHECK-NEXT: call void @llvm.instrprof.increment( // CHECK: omp.inner.for.body: // CHECK-NEXT: call void @llvm.instrprof.increment( diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index f6946165cfd3b..8f14afb70c674 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -112,18 +112,18 @@ int foo(int n) { a += 1; } - // CEHCK-32: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) - // CEHCK-32: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) - // CEHCK-32: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) - // CEHCK-64: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) - // CEHCK-64: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) - // CEHCK-64: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) + // CHECK-32: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) + // CHECK-32: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) + // CHECK-32: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) + // CHECK-64: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) + // CHECK-64: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) + // CHECK-64: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) int lin = 12; #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait for (unsigned long long it = 2000; it >= 600; it-=400) { diff --git a/flang/test/Lower/ext-proc-as-actual-argument-1.f90 b/flang/test/Lower/ext-proc-as-actual-argument-1.f90 index e121a82a3e021..36751b82a1cad 100644 --- a/flang/test/Lower/ext-proc-as-actual-argument-1.f90 +++ b/flang/test/Lower/ext-proc-as-actual-argument-1.f90 @@ -13,7 +13,7 @@ ! CHECK: return ! CHECK-LABEL: func @_QPext_func( -! CEHCK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { +! CHECK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { program m external :: ext_func call sub(ext_func) diff --git a/flang/test/Lower/ext-proc-as-actual-argument-2.f90 b/flang/test/Lower/ext-proc-as-actual-argument-2.f90 index 8c04e8617f49e..5b6d0aad308a7 100644 --- a/flang/test/Lower/ext-proc-as-actual-argument-2.f90 +++ b/flang/test/Lower/ext-proc-as-actual-argument-2.f90 @@ -13,7 +13,7 @@ ! CHECK: return ! CHECK-LABEL: func @_QPext_func( -! CEHCK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { +! CHECK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { program m external :: ext_func call sub(ext_func) diff --git a/flang/test/Lower/fail_image.f90 b/flang/test/Lower/fail_image.f90 index 9da162faca248..f2b54e7c2d560 100644 --- a/flang/test/Lower/fail_image.f90 +++ b/flang/test/Lower/fail_image.f90 @@ -14,7 +14,7 @@ subroutine fail_image_test(fail) ! CHECK: ^[[BB2]]: ! CHECK-NEXT: br ^[[BB3:.*]] ! CHECK-NEXT: ^[[BB3]] -! CEHCK-NEXT: return +! CHECK-NEXT: return return end subroutine ! CHECK-LABEL: func private @_FortranAFailImageStatement() -> none attributes {fir.runtime} diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test index cf82f8b493568..e8319341084af 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test +++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test @@ -56,7 +56,7 @@ # https://github.com/llvm/llvm-project/issues/53575. Fix them after resolving # the issue. -# CEHCK-LABEL: (lldb) image lookup -a 0x140001003 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001003 -v # CHECK: Summary: {{.*}}`main + 3 at a.cpp:2 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) @@ -64,7 +64,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001000, 0x000000014000102d) -> DW_OP_reg26 XMM9 # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX -# CEHCK-LABEL: (lldb) image lookup -a 0x140001004 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001004 -v # CHECK: Summary: {{.*}}`main + 4 [inlined] Namespace1::foo at a.h:5 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) @@ -77,7 +77,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001010 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001010 -v # CHECK: Summary: {{.*}}`main + 16 [inlined] Namespace1::foo + 12 at a.h:7 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) @@ -90,7 +90,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x14000101c -v +# CHECK-LABEL: (lldb) image lookup -a 0x14000101c -v # CHECK: Summary: {{.*}}`main + 28 [inlined] Class1::bar at b.h:5 # CHECK-NEXT: {{.*}}`main + 28 [inlined] Namespace1::foo + 24 at a.h:9 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 @@ -107,7 +107,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x14000102a -v +# CHECK-LABEL: (lldb) image lookup -a 0x14000102a -v # CHECK: Summary: {{.*}}`main + 42 [inlined] Namespace2::Class2::func at c.h:5 # CHECK-NEXT: {{.*}}`main + 42 [inlined] Class1::bar + 14 at b.h:7 # CHECK-NEXT: {{.*}}`main + 28 [inlined] Namespace1::foo + 24 at a.h:9 @@ -127,7 +127,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001039 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001039 -v # CHECK: Summary: {{.*}}`main + 57 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) @@ -135,7 +135,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001044 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001044 -v # CHECK: Summary: {{.*}}`main + 68 [inlined] Namespace1::foo + 5 at a.h:8 # CHECK-NEXT: {{.*}}`main + 63 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) diff --git a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir index 2f1022f13a4b1..e397567f4e582 100644 --- a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir +++ b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir @@ -191,7 +191,7 @@ body: | ; CHECK: %1:g8rc_and_g8rc_nox0 = COPY killed %12 ; CHECK: %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) ; CHECK: %6:crrc = CMPWI killed %5, 0 - ; CEHCK: %7:crbitrc = COPY killed %6.sub_eq + ; CHECK: %7:crbitrc = COPY killed %6.sub_eq ; CHECK: %2:g8rc = nuw ADDI8 %1, 1 ; CHECK: STD %2, 0, %4 :: (store (s64) into %ir.p) ; CHECK: %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) diff --git a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir index a8eacc201a365..a79f2586850fe 100644 --- a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir +++ b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir @@ -158,7 +158,7 @@ body: | ; CHECK: %21:gprc = SUBF killed %20, killed %8 ; CHECK: %22:crrc = CMPLWI %21, 10 ; CHECK: %23:gprc = ISEL %15, %14, killed %22.sub_lt - ; CEHCK: %24:gprc = ADD4 killed %23, killed %21 + ; CHECK: %24:gprc = ADD4 killed %23, killed %21 ; CHECK: %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store (s8) into %ir.7) ; CHECK: %26:gprc = DIVW %19, %9 ; CHECK: %57:gprc = COPY killed %26 diff --git a/llvm/test/CodeGen/PowerPC/vsx-args.ll b/llvm/test/CodeGen/PowerPC/vsx-args.ll index 8cd2dbfde2795..e8137fa6bebb5 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-args.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-args.ll @@ -28,7 +28,7 @@ entry: ; CHECK-FISL: vmr 4, 3 ; CHECK-FISL: lxvd2x 35, 1, 3 ; CHECK-FISL: 3, 144 -; CHCEK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: vmr 4, 2 ; CHECK-FISL: bl sv diff --git a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir index d652da6088d5d..f43ae955bcd33 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir @@ -19,7 +19,7 @@ # CHECK: DBG_VALUE $rcx # CHECK-NEXT: $rdx = MOV64rr killed $rcx # CHECK-LABEL: bb.5: -# CHEKC-NOT: DBG_VALUE +# CHECK-NOT: DBG_VALUE --- | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/MC/WebAssembly/tag-section-decoding.ll b/llvm/test/MC/WebAssembly/tag-section-decoding.ll index 605cae8e9f147..d872047f27414 100644 --- a/llvm/test/MC/WebAssembly/tag-section-decoding.ll +++ b/llvm/test/MC/WebAssembly/tag-section-decoding.ll @@ -339,4 +339,4 @@ define i32 @test_throw(i8* %p) { ; number with which its LEB128 and ULEB128 encodings are different, because its ; 7th least significant bit is not 0. ; CHECK: - Type: TAG -; CHEC-NEXT: TagTypes: [ 64 ] +; CHECK-NEXT: TagTypes: [ 64 ] diff --git a/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s b/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s index 4c88384616f55..806901d01dce7 100644 --- a/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s +++ b/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s @@ -16,12 +16,12 @@ foo: end_function # CHECK-LABEL: foo: -# CHEKC-NEXT: .functype foo () -> (i32) -# CHEKC-NEXT: i32.const 1 -# CHEKC-NEXT: if i32 -# CHEKC-NEXT: i32.const 2 -# CHEKC-NEXT: return -# CHEKC-NEXT: else -# CHEKC-NEXT: i32.const 3 -# CHEKC-NEXT: end_if -# CHEKC-NEXT: end_function +# CHECK-NEXT: .functype foo () -> (i32) +# CHECK-NEXT: i32.const 1 +# CHECK-NEXT: if i32 +# CHECK-NEXT: i32.const 2 +# CHECK-NEXT: return +# CHECK-NEXT: else +# CHECK-NEXT: i32.const 3 +# CHECK-NEXT: end_if +# CHECK-NEXT: end_function diff --git a/llvm/test/Transforms/Coroutines/coro-debug.ll b/llvm/test/Transforms/Coroutines/coro-debug.ll index 396cf5472d64e..abb9edc33de8b 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug.ll @@ -189,7 +189,7 @@ attributes #7 = { noduplicate } ; CHECK: %[[ALLOCATED_STORAGE:.+]] = invoke i8* @allocate() ; CHECK-NEXT: to label %[[NORMAL_DEST:.+]] unwind ; CHECK: [[NORMAL_DEST]] -; CHEKC-NEXT: call void @llvm.dbg.declare(metadata i8* %[[ALLOCATED_STORAGE]] +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i8* %[[ALLOCATED_STORAGE]] ; CHECK: %[[CALLBR_RES:.+]] = callbr i32 asm ; CHECK-NEXT: to label %[[DEFAULT_DEST:.+]] [label ; CHECK: [[DEFAULT_DEST]]: diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test index 1d11a85b47883..c673028584c0d 100644 --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -91,7 +91,7 @@ ; CHECK: 1: 4 ; CHECK: 2: 3 ; CHECK: 3: 1 -; CEHCK: 5: 4 fb:4 +; CHECK: 5: 4 fb:4 ; CHECK: 6: 1 fa:1 ; CHECK !CFGChecksum: 563022570642068 ; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2 diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir index df39036277ee6..a29b18b6812b9 100644 --- a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir +++ b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir @@ -148,7 +148,7 @@ func.func @powf_scalar(%lhs: f32, %rhs: f32) -> f32 { // CHECK-LABEL: @powf_vector func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { // CHECK: spirv.FOrdLessThan - // CHEKC: spirv.GL.FAbs + // CHECK: spirv.GL.FAbs // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> // CHECK: spirv.FNegate // CHECK: spirv.Select diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index 5a418022800cf..3fd4ae1c81c96 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -57,7 +57,7 @@ func.func @subview_canonicalize(%arg0 : memref, %arg1 : index, // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : memref to memref<4x1x?xf32 // CHECK: %[[RESULT:.+]] = memref.cast %[[SUBVIEW]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 9cddfd88735ab..99e31c7c35964 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -353,7 +353,7 @@ func.func @slice_canonicalize(%arg0 : tensor, %arg1 : index, // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : tensor to tensor<4x1x?xf32> // CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -372,7 +372,7 @@ func.func @rank_reducing_slice_canonicalize(%arg0 : tensor, %arg1 : i // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : tensor to tensor<4x?xf32> // CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -467,7 +467,7 @@ func.func @slice_to_insert_slice_canonicalize(%arg0 : tensor, %arg1 : // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] // CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] // CHECK-SAME: : tensor<4x1x?xf32> into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -486,7 +486,7 @@ func.func @rank_reducing_insert_slice_canonicalize(%arg0 : tensor, %arg // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[CAST]] // CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] // CHECK-SAME: : tensor<4x?xf32> into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -509,7 +509,7 @@ func.func @rank_reducing_slice_to_insert_slice_canonicalize(%arg0 : tensor into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- diff --git a/mlir/test/python/ir/affine_expr.py b/mlir/test/python/ir/affine_expr.py index 9854b496fe460..6a3a6fcc65e1b 100644 --- a/mlir/test/python/ir/affine_expr.py +++ b/mlir/test/python/ir/affine_expr.py @@ -116,7 +116,7 @@ def testAffineExprSymbol(): # CHECK: 2 print(s2.position) - # CHEKC: s2 + # CHECK: s2 print(s2) assert s1 == s11 From b5e93e390c06602cb03121ad875e2855253e2937 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Mon, 17 Oct 2022 17:45:15 +0000 Subject: [PATCH 262/516] [flang] Add -f[no-]honor-nans and -menable-no-nans Only add the option processing and store the result. No attributes are added to FIR yet. Differential Revision: https://reviews.llvm.org/D137325 --- clang/include/clang/Driver/Options.td | 6 +++--- clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++++++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/test/Driver/driver-help.f90 | 1 + flang/test/Driver/flang_fp_opts.f90 | 7 ++++++- flang/test/Driver/frontend-forwarding.f90 | 2 ++ 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 65cd6e85da4e1..bc0b89190af0b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5444,9 +5444,6 @@ def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, HelpText<"Specify which frame pointers to retain.">, Values<"all,non-leaf,none">, NormalizedValuesScope<"CodeGenOptions::FramePointerKind">, NormalizedValues<["All", "NonLeaf", "None"]>, MarshallingInfoEnum, "None">; -def menable_no_nans : Flag<["-"], "menable-no-nans">, - HelpText<"Allow optimization to assume there are no NaNs.">, - MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; def mreassociate : Flag<["-"], "mreassociate">, HelpText<"Allow reassociation transformations for floating-point instructions">, MarshallingInfoFlag>, ImpliedByAnyOf<[funsafe_math_optimizations.KeyPath]>; @@ -6057,6 +6054,9 @@ def split_dwarf_output : Separate<["-"], "split-dwarf-output">, let Flags = [CC1Option, FC1Option, NoDriverOption] in { +def menable_no_nans : Flag<["-"], "menable-no-nans">, + HelpText<"Allow optimization to assume there are no NaNs.">, + MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; def menable_no_infinities : Flag<["-"], "menable-no-infs">, HelpText<"Allow optimization to assume there are no infinities.">, MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 14547b6f409aa..f66a024c7ffa2 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -84,6 +84,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { StringRef FPContract; bool HonorINFs = true; + bool HonorNaNs = true; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -115,6 +116,12 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, case options::OPT_fno_honor_infinities: HonorINFs = false; break; + case options::OPT_fhonor_nans: + HonorNaNs = true; + break; + case options::OPT_fno_honor_nans: + HonorNaNs = false; + break; } // If we handled this option claim it @@ -126,6 +133,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (!HonorINFs) CmdArgs.push_back("-menable-no-infs"); + + if (!HonorNaNs) + CmdArgs.push_back("-menable-no-nans"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 96e9ea63f1964..6bafc0613d3bd 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -23,6 +23,8 @@ ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Off) ///< FP Contract Mode (off/ /// Permit floating point optimization without regard to infinities LANGOPT(NoHonorInfs, 1, false) +/// Permit floating point optimization without regard to NaN +LANGOPT(NoHonorNaNs, 1, false) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 252e1a7e697a5..cce97caea5159 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -702,6 +702,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.NoHonorInfs = true; } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_menable_no_nans)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoHonorNaNs = true; + } + return true; } diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 068985bc6d56e..587c0ec2ffcb5 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -131,6 +131,7 @@ ! HELP-FC1-NEXT: -I Add directory to the end of the list of include search paths ! HELP-FC1-NEXT: -load Load the named plugin (dynamic shared object) ! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities. +! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs. ! HELP-FC1-NEXT: -mllvm Additional arguments to forward to LLVM's option processing ! HELP-FC1-NEXT: -mmlir Additional arguments to forward to MLIR's option processing ! HELP-FC1-NEXT: -module-dir Put MODULE files in diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index 272ef8495b957..79f1ba796a4a0 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -1,5 +1,10 @@ ! Test for handling of floating point options within the frontend driver -! RUN: %flang_fc1 -ffp-contract=fast -menable-no-infs %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 \ +! RUN: -ffp-contract=fast \ +! RUN: -menable-no-infs \ +! RUN: -menable-no-nans \ +! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented ! CHECK: menable-no-infs is not currently implemented +! CHECK: menable-no-nans is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index 0c3fd6c48ee20..1160509a500c3 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -10,6 +10,7 @@ ! RUN: -fconvert=little-endian \ ! RUN: -ffp-contract=fast \ ! RUN: -fno-honor-infinities \ +! RUN: -fno-honor-nans \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -22,5 +23,6 @@ ! CHECK: "-flarge-sizes" ! CHECK: "-ffp-contract=fast" ! CHECK: "-menable-no-infs" +! CHECK: "-menable-no-nans" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From 36b37a1ed561404a32a4b4b6e2bd92d915894a7c Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 18 Oct 2022 16:14:52 +0000 Subject: [PATCH 263/516] [flang] Add -f[no-]approx-func Only add the option processing and store the result. No attributes are added to FIR yet. Differential Revision: https://reviews.llvm.org/D137326 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++++++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/test/Driver/driver-help-hidden.f90 | 1 + flang/test/Driver/driver-help.f90 | 2 ++ flang/test/Driver/flang_fp_opts.f90 | 2 ++ flang/test/Driver/frontend-forwarding.f90 | 2 ++ 8 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bc0b89190af0b..72aa93644865b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1892,7 +1892,7 @@ defm reciprocal_math : BoolFOption<"reciprocal-math", [funsafe_math_optimizations.KeyPath]>, NegFlag>; defm approx_func : BoolFOption<"approx-func", LangOpts<"ApproxFunc">, DefaultFalse, - PosFlag, NegFlag>; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index f66a024c7ffa2..0e82194999c5e 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -85,6 +85,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, StringRef FPContract; bool HonorINFs = true; bool HonorNaNs = true; + bool ApproxFunc = false; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -122,6 +123,12 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, case options::OPT_fno_honor_nans: HonorNaNs = false; break; + case options::OPT_fapprox_func: + ApproxFunc = true; + break; + case options::OPT_fno_approx_func: + ApproxFunc = false; + break; } // If we handled this option claim it @@ -136,6 +143,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (!HonorNaNs) CmdArgs.push_back("-menable-no-nans"); + + if (ApproxFunc) + CmdArgs.push_back("-fapprox-func"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 6bafc0613d3bd..4a5c47f0da637 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -25,6 +25,8 @@ ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Off) ///< FP Contract Mode (off/ LANGOPT(NoHonorInfs, 1, false) /// Permit floating point optimization without regard to NaN LANGOPT(NoHonorNaNs, 1, false) +/// Allow math functions to be replaced with an approximately equivalent calculation +LANGOPT(ApproxFunc, 1, false) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index cce97caea5159..dd461c740e3ba 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -708,6 +708,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.NoHonorNaNs = true; } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_fapprox_func)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.ApproxFunc = true; + } + return true; } diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index d2dc82ea1b526..3c249e4f4c7d8 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -22,6 +22,7 @@ ! CHECK-NEXT: -E Only run the preprocessor ! CHECK-NEXT: -falternative-parameter-statement ! CHECK-NEXT: Enable the old style PARAMETER statement +! CHECK-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! CHECK-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! CHECK-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! CHECK-NEXT: -fconvert= Set endian conversion of data for unformatted files diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 587c0ec2ffcb5..3ca3d065e2e30 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -22,6 +22,7 @@ ! HELP-NEXT: -E Only run the preprocessor ! HELP-NEXT: -falternative-parameter-statement ! HELP-NEXT: Enable the old style PARAMETER statement +! HELP-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! HELP-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! HELP-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! HELP-NEXT: -fconvert= Set endian conversion of data for unformatted files @@ -79,6 +80,7 @@ ! HELP-FC1-NEXT: -E Only run the preprocessor ! HELP-FC1-NEXT: -falternative-parameter-statement ! HELP-FC1-NEXT: Enable the old style PARAMETER statement +! HELP-FC1-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! HELP-FC1-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! HELP-FC1-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! HELP-FC1-NEXT: -fconvert= Set endian conversion of data for unformatted files diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index 79f1ba796a4a0..3b9d0f2d09794 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -4,7 +4,9 @@ ! RUN: -ffp-contract=fast \ ! RUN: -menable-no-infs \ ! RUN: -menable-no-nans \ +! RUN: -fapprox-func \ ! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented ! CHECK: menable-no-infs is not currently implemented ! CHECK: menable-no-nans is not currently implemented +! CHECK: fapprox-func is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index 1160509a500c3..ed0e89a69ca27 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -11,6 +11,7 @@ ! RUN: -ffp-contract=fast \ ! RUN: -fno-honor-infinities \ ! RUN: -fno-honor-nans \ +! RUN: -fapprox-func \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -24,5 +25,6 @@ ! CHECK: "-ffp-contract=fast" ! CHECK: "-menable-no-infs" ! CHECK: "-menable-no-nans" +! CHECK: "-fapprox-func" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From b5b8a8cfbe1ee3c2d3684dd62e7f0ddeeeb73273 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 18 Oct 2022 17:59:03 +0000 Subject: [PATCH 264/516] [flang] Add -f[no-]signed-zeros Only add the option processing and store the result. No attributes are added to FIR yet. Differential Revision: https://reviews.llvm.org/D137328 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++++++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/test/Driver/driver-help-hidden.f90 | 1 + flang/test/Driver/driver-help.f90 | 2 ++ flang/test/Driver/flang_fp_opts.f90 | 2 ++ flang/test/Driver/frontend-forwarding.f90 | 2 ++ 8 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 72aa93644865b..99500918d8bf7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1902,7 +1902,7 @@ defm finite_math_only : BoolFOption<"finite-math-only", NegFlag>; defm signed_zeros : BoolFOption<"signed-zeros", LangOpts<"NoSignedZero">, DefaultFalse, - NegFlag, PosFlag>; def fhonor_nans : Flag<["-"], "fhonor-nans">, Group; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 0e82194999c5e..a5b47123b9bc4 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -86,6 +86,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, bool HonorINFs = true; bool HonorNaNs = true; bool ApproxFunc = false; + bool SignedZeros = true; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -129,6 +130,12 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, case options::OPT_fno_approx_func: ApproxFunc = false; break; + case options::OPT_fsigned_zeros: + SignedZeros = true; + break; + case options::OPT_fno_signed_zeros: + SignedZeros = false; + break; } // If we handled this option claim it @@ -146,6 +153,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (ApproxFunc) CmdArgs.push_back("-fapprox-func"); + + if (!SignedZeros) + CmdArgs.push_back("-fno-signed-zeros"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 4a5c47f0da637..e357182f50757 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -27,6 +27,8 @@ LANGOPT(NoHonorInfs, 1, false) LANGOPT(NoHonorNaNs, 1, false) /// Allow math functions to be replaced with an approximately equivalent calculation LANGOPT(ApproxFunc, 1, false) +/// Allow optimizations that ignore the sign of floating point zeros +LANGOPT(NoSignedZeros, 1, false) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index dd461c740e3ba..eeadb21c19233 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -714,6 +714,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.ApproxFunc = true; } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_fno_signed_zeros)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoSignedZeros = true; + } + return true; } diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index 3c249e4f4c7d8..b3913e99d1387 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -45,6 +45,7 @@ ! CHECK-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler +! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 3ca3d065e2e30..1c48ec572443e 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -43,6 +43,7 @@ ! HELP-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler +! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages @@ -124,6 +125,7 @@ ! HELP-FC1-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode +! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index 3b9d0f2d09794..a305cddef4d2f 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -5,8 +5,10 @@ ! RUN: -menable-no-infs \ ! RUN: -menable-no-nans \ ! RUN: -fapprox-func \ +! RUN: -fno-signed-zeros \ ! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented ! CHECK: menable-no-infs is not currently implemented ! CHECK: menable-no-nans is not currently implemented ! CHECK: fapprox-func is not currently implemented +! CHECK: fno-signed-zeros is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index ed0e89a69ca27..a00c45a9c154f 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -12,6 +12,7 @@ ! RUN: -fno-honor-infinities \ ! RUN: -fno-honor-nans \ ! RUN: -fapprox-func \ +! RUN: -fno-signed-zeros \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -26,5 +27,6 @@ ! CHECK: "-menable-no-infs" ! CHECK: "-menable-no-nans" ! CHECK: "-fapprox-func" +! CHECK: "-fno-signed-zeros" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From c4dc3c029416a25103d631e8dc5422f65c076376 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 19 Oct 2022 10:33:12 +0000 Subject: [PATCH 265/516] [flang] Add -f[no-]associative-math and -mreassociate Only add the option processing and store the result. No attributes are added to FIR yet. Clang only forwards -mreassociate if (AssociativeMath && !SignedZeros && !TrappingMath) Flang doesn't have -f[no-]trapping-math, so this part of the condition has been omitted. !TrappingMath is the default. Differential Revision: https://reviews.llvm.org/D137329 --- clang/include/clang/Driver/Options.td | 6 +++--- clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++++++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/test/Driver/driver-help.f90 | 1 + flang/test/Driver/flang_fp_opts.f90 | 2 ++ flang/test/Driver/frontend-forwarding.f90 | 2 ++ 7 files changed, 26 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 99500918d8bf7..77c84396fa995 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5444,9 +5444,6 @@ def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, HelpText<"Specify which frame pointers to retain.">, Values<"all,non-leaf,none">, NormalizedValuesScope<"CodeGenOptions::FramePointerKind">, NormalizedValues<["All", "NonLeaf", "None"]>, MarshallingInfoEnum, "None">; -def mreassociate : Flag<["-"], "mreassociate">, - HelpText<"Allow reassociation transformations for floating-point instructions">, - MarshallingInfoFlag>, ImpliedByAnyOf<[funsafe_math_optimizations.KeyPath]>; def mabi_EQ_ieeelongdouble : Flag<["-"], "mabi=ieeelongdouble">, HelpText<"Use IEEE 754 quadruple-precision for long double">, MarshallingInfoFlag>; @@ -6054,6 +6051,9 @@ def split_dwarf_output : Separate<["-"], "split-dwarf-output">, let Flags = [CC1Option, FC1Option, NoDriverOption] in { +def mreassociate : Flag<["-"], "mreassociate">, + HelpText<"Allow reassociation transformations for floating-point instructions">, + MarshallingInfoFlag>, ImpliedByAnyOf<[funsafe_math_optimizations.KeyPath]>; def menable_no_nans : Flag<["-"], "menable-no-nans">, HelpText<"Allow optimization to assume there are no NaNs.">, MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index a5b47123b9bc4..f51972773a553 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -87,6 +87,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, bool HonorNaNs = true; bool ApproxFunc = false; bool SignedZeros = true; + bool AssociativeMath = false; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -136,6 +137,12 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, case options::OPT_fno_signed_zeros: SignedZeros = false; break; + case options::OPT_fassociative_math: + AssociativeMath = true; + break; + case options::OPT_fno_associative_math: + AssociativeMath = false; + break; } // If we handled this option claim it @@ -156,6 +163,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (!SignedZeros) CmdArgs.push_back("-fno-signed-zeros"); + + if (AssociativeMath && !SignedZeros) + CmdArgs.push_back("-mreassociate"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index e357182f50757..059e3d18dfc8a 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -29,6 +29,8 @@ LANGOPT(NoHonorNaNs, 1, false) LANGOPT(ApproxFunc, 1, false) /// Allow optimizations that ignore the sign of floating point zeros LANGOPT(NoSignedZeros, 1, false) +/// Allow reassociation transformations for floating-point instructions +LANGOPT(AssociativeMath, 1, false) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index eeadb21c19233..8c0bdcd185b6c 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -720,6 +720,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.NoSignedZeros = true; } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_mreassociate)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.AssociativeMath = true; + } + return true; } diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 1c48ec572443e..32b6f7615deca 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -140,6 +140,7 @@ ! HELP-FC1-NEXT: -mmlir Additional arguments to forward to MLIR's option processing ! HELP-FC1-NEXT: -module-dir Put MODULE files in ! HELP-FC1-NEXT: -module-suffix Use as the suffix for module files (the default value is `.mod`) +! HELP-FC1-NEXT: -mreassociate Allow reassociation transformations for floating-point instructions ! HELP-FC1-NEXT: -mrelocation-model ! HELP-FC1-NEXT: The relocation model to use ! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index a305cddef4d2f..bbe886bcec878 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -6,9 +6,11 @@ ! RUN: -menable-no-nans \ ! RUN: -fapprox-func \ ! RUN: -fno-signed-zeros \ +! RUN: -mreassociate \ ! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented ! CHECK: menable-no-infs is not currently implemented ! CHECK: menable-no-nans is not currently implemented ! CHECK: fapprox-func is not currently implemented ! CHECK: fno-signed-zeros is not currently implemented +! CHECK: mreassociate is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index a00c45a9c154f..de3ed6ddbfca8 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -13,6 +13,7 @@ ! RUN: -fno-honor-nans \ ! RUN: -fapprox-func \ ! RUN: -fno-signed-zeros \ +! RUN: -fassociative-math \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -28,5 +29,6 @@ ! CHECK: "-menable-no-nans" ! CHECK: "-fapprox-func" ! CHECK: "-fno-signed-zeros" +! CHECK: "-mreassociate" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From d0d4b635786d510cd919cadbeb7e5e19983242cf Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 19 Oct 2022 10:51:31 +0000 Subject: [PATCH 266/516] [flang] add -f[no-]reciprocal-math Only add the option processing and store the result. No attributes are added to FIR yet. Differential Revision: https://reviews.llvm.org/D137330 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++++++ flang/include/flang/Frontend/LangOptions.def | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/test/Driver/driver-help-hidden.f90 | 1 + flang/test/Driver/driver-help.f90 | 2 ++ flang/test/Driver/flang_fp_opts.f90 | 2 ++ flang/test/Driver/frontend-forwarding.f90 | 2 ++ 8 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 77c84396fa995..608840b2d3691 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1888,7 +1888,7 @@ def fassociative_math : Flag<["-"], "fassociative-math">, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Group; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, - PosFlag, NegFlag>; defm approx_func : BoolFOption<"approx-func", LangOpts<"ApproxFunc">, DefaultFalse, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index f51972773a553..43f6a82c33c49 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -88,6 +88,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, bool ApproxFunc = false; bool SignedZeros = true; bool AssociativeMath = false; + bool ReciprocalMath = false; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -143,6 +144,12 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, case options::OPT_fno_associative_math: AssociativeMath = false; break; + case options::OPT_freciprocal_math: + ReciprocalMath = true; + break; + case options::OPT_fno_reciprocal_math: + ReciprocalMath = false; + break; } // If we handled this option claim it @@ -166,6 +173,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (AssociativeMath && !SignedZeros) CmdArgs.push_back("-mreassociate"); + + if (ReciprocalMath) + CmdArgs.push_back("-freciprocal-math"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 059e3d18dfc8a..024db6109d6a1 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -31,6 +31,8 @@ LANGOPT(ApproxFunc, 1, false) LANGOPT(NoSignedZeros, 1, false) /// Allow reassociation transformations for floating-point instructions LANGOPT(AssociativeMath, 1, false) +/// Allow division operations to be reassociated +LANGOPT(ReciprocalMath, 1, false) #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 8c0bdcd185b6c..bb87ea285a265 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -726,6 +726,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.AssociativeMath = true; } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_freciprocal_math)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.ReciprocalMath = true; + } + return true; } diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index b3913e99d1387..3bce2a57caa1a 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -48,6 +48,7 @@ ! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! CHECK-NEXT: -help Display available options diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 32b6f7615deca..8d24deee0b1ad 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -46,6 +46,7 @@ ! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-NEXT: -help Display available options @@ -128,6 +129,7 @@ ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-FC1-NEXT: -help Display available options diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index bbe886bcec878..0dc31f6f7649e 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -7,6 +7,7 @@ ! RUN: -fapprox-func \ ! RUN: -fno-signed-zeros \ ! RUN: -mreassociate \ +! RUN: -freciprocal-math \ ! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented ! CHECK: menable-no-infs is not currently implemented @@ -14,3 +15,4 @@ ! CHECK: fapprox-func is not currently implemented ! CHECK: fno-signed-zeros is not currently implemented ! CHECK: mreassociate is not currently implemented +! CHECK: freciprocal-math is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index de3ed6ddbfca8..9d1d7cb8d3c88 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -14,6 +14,7 @@ ! RUN: -fapprox-func \ ! RUN: -fno-signed-zeros \ ! RUN: -fassociative-math \ +! RUN: -freciprocal-math \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -30,5 +31,6 @@ ! CHECK: "-fapprox-func" ! CHECK: "-fno-signed-zeros" ! CHECK: "-mreassociate" +! CHECK: "-freciprocal-math" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" From 589764a382642ae8374cfe21a6b10f839c8596da Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 4 Nov 2022 09:52:47 -0700 Subject: [PATCH 267/516] [mlir][math] Initial support for fastmath flag attributes for Math dialect. Added arith::FastMathAttr and ArithFastMathInterface support for Math dialect floating point operations. This change-set creates ArithCommon conversion utils that currently provide classes and methods to aid with arith::FastMathAttr conversion into LLVM::FastmathFlags. These utils are used in ArithToLLVM and MathToLLVM convertors, but may eventually be used by other converters that need to convert fast math attributes. Since Math dialect operations use arith::FastMathAttr, MathOps.td now has to include enum and attributes definitions from Arith dialect. To minimize the amount of TD code included from Arith dialect, I moved FastMathAttr definition into ArithBase.td. Differential Revision: https://reviews.llvm.org/D136312 --- .../ArithCommon/AttrToLLVMConverter.h | 81 +++++++++++++ .../mlir/Dialect/Arith/IR/ArithBase.td | 5 + .../include/mlir/Dialect/Arith/IR/ArithOps.td | 5 - mlir/include/mlir/Dialect/Math/IR/Math.h | 1 + mlir/include/mlir/Dialect/Math/IR/MathOps.td | 35 ++++-- .../ArithCommon/AttrToLLVMConverter.cpp | 38 ++++++ .../lib/Conversion/ArithCommon/CMakeLists.txt | 10 ++ .../Conversion/ArithToLLVM/ArithToLLVM.cpp | 113 ++++-------------- .../lib/Conversion/ArithToLLVM/CMakeLists.txt | 1 + mlir/lib/Conversion/CMakeLists.txt | 1 + mlir/lib/Conversion/MathToLLVM/CMakeLists.txt | 1 + mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp | 91 +++++++++----- .../Conversion/MathToLLVM/math-to-llvm.mlir | 101 ++++++++++++++++ mlir/test/Dialect/Math/ops.mlir | 14 +++ 14 files changed, 359 insertions(+), 138 deletions(-) create mode 100644 mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h create mode 100644 mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp create mode 100644 mlir/lib/Conversion/ArithCommon/CMakeLists.txt diff --git a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h new file mode 100644 index 0000000000000..f27f7bb5975ec --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h @@ -0,0 +1,81 @@ +//===- AttrToLLVMConverter.h - Arith attributes conversion ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H +#define MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" + +//===----------------------------------------------------------------------===// +// Support for converting Arith FastMathFlags to LLVM FastmathFlags +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace arith { +// Map arithmetic fastmath enum values to LLVMIR enum values. +LLVM::FastmathFlags +convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF); + +// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. +LLVM::FastmathFlagsAttr +convertArithFastMathAttrToLLVM(arith::FastMathFlagsAttr fmfAttr); + +// Attribute converter that populates a NamedAttrList by removing the fastmath +// attribute from the source operation attributes, and replacing it with an +// equivalent LLVM fastmath attribute. +template +class AttrConvertFastMathToLLVM { +public: + AttrConvertFastMathToLLVM(SourceOp srcOp) { + // Copy the source attributes. + convertedAttr = NamedAttrList{srcOp->getAttrs()}; + // Get the name of the arith fastmath attribute. + llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); + // Remove the source fastmath attribute. + auto arithFMFAttr = + convertedAttr.erase(arithFMFAttrName) + .template dyn_cast_or_null(); + if (arithFMFAttr) { + llvm::StringRef targetAttrName = TargetOp::getFastmathAttrName(); + convertedAttr.set(targetAttrName, + convertArithFastMathAttrToLLVM(arithFMFAttr)); + } + } + + ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } + +private: + NamedAttrList convertedAttr; +}; + +// Attribute converter that populates a NamedAttrList by removing the fastmath +// attribute from the source operation attributes. This may be useful for +// target operations that do not require the fastmath attribute, or for targets +// that do not yet support the LLVM fastmath attribute. +template +class AttrDropFastMath { +public: + AttrDropFastMath(SourceOp srcOp) { + // Copy the source attributes. + convertedAttr = NamedAttrList{srcOp->getAttrs()}; + // Get the name of the arith fastmath attribute. + llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); + // Remove the source fastmath attribute. + convertedAttr.erase(arithFMFAttrName); + } + + ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } + +private: + NamedAttrList convertedAttr; +}; +} // namespace arith +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td index 13d252cf056e5..78fd7bdf012f8 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td @@ -121,4 +121,9 @@ def FastMathFlags : I32BitEnumAttr< let printBitEnumPrimaryGroups = 1; } +def Arith_FastMathAttr : + EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + #endif // ARITH_BASE diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index 6ca74392f0565..3d6cef9705ebe 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -20,11 +20,6 @@ include "mlir/IR/BuiltinAttributeInterfaces.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/EnumAttr.td" -def Arith_FastMathAttr : - EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - // Base class for Arith dialect ops. Ops in this dialect have no side // effects and can be applied element-wise to vectors and tensors. class Arith_Op traits = []> : diff --git a/mlir/include/mlir/Dialect/Math/IR/Math.h b/mlir/include/mlir/Dialect/Math/IR/Math.h index 6af358bf57b37..98416d1c9abdf 100644 --- a/mlir/include/mlir/Dialect/Math/IR/Math.h +++ b/mlir/include/mlir/Dialect/Math/IR/Math.h @@ -9,6 +9,7 @@ #ifndef MLIR_DIALECT_MATH_IR_MATH_H_ #define MLIR_DIALECT_MATH_IR_MATH_H_ +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td index 99e209000c0f5..a5b28bd0891c5 100644 --- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td @@ -9,6 +9,8 @@ #ifndef MATH_OPS #define MATH_OPS +include "mlir/Dialect/Arith/IR/ArithBase.td" +include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" include "mlir/Dialect/Math/IR/MathBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/VectorInterfaces.td" @@ -36,11 +38,16 @@ class Math_IntegerUnaryOp traits = []> : // operand and result of the same type. This type can be a floating point type, // vector or tensor thereof. class Math_FloatUnaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$operand); + Math_Op]> { + let arguments = (ins FloatLike:$operand, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$operand attr-dict `:` type($result)"; + let assemblyFormat = [{ $operand (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } // Base class for binary math operations on integer types. Require two @@ -58,22 +65,32 @@ class Math_IntegerBinaryOp traits = []> : // operands and one result of the same type. This type can be a floating point // type, vector or tensor thereof. class Math_FloatBinaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$lhs, FloatLike:$rhs); + Math_Op]> { + let arguments = (ins FloatLike:$lhs, FloatLike:$rhs, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)"; + let assemblyFormat = [{ $lhs `,` $rhs (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } // Base class for floating point ternary operations. Require three operands and // one result of the same type. This type can be a floating point type, vector // or tensor thereof. class Math_FloatTernaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$a, FloatLike:$b, FloatLike:$c); + Math_Op]> { + let arguments = (ins FloatLike:$a, FloatLike:$b, FloatLike:$c, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$a `,` $b `,` $c attr-dict `:` type($result)"; + let assemblyFormat = [{ $a `,` $b `,` $c (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp new file mode 100644 index 0000000000000..8c5d76f9f2d72 --- /dev/null +++ b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp @@ -0,0 +1,38 @@ +//===- AttrToLLVMConverter.cpp - Arith attributes conversion to LLVM ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" + +using namespace mlir; + +// Map arithmetic fastmath enum values to LLVMIR enum values. +LLVM::FastmathFlags +mlir::arith::convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF) { + LLVM::FastmathFlags llvmFMF{}; + const std::pair flags[] = { + {arith::FastMathFlags::nnan, LLVM::FastmathFlags::nnan}, + {arith::FastMathFlags::ninf, LLVM::FastmathFlags::ninf}, + {arith::FastMathFlags::nsz, LLVM::FastmathFlags::nsz}, + {arith::FastMathFlags::arcp, LLVM::FastmathFlags::arcp}, + {arith::FastMathFlags::contract, LLVM::FastmathFlags::contract}, + {arith::FastMathFlags::afn, LLVM::FastmathFlags::afn}, + {arith::FastMathFlags::reassoc, LLVM::FastmathFlags::reassoc}}; + for (auto fmfMap : flags) { + if (bitEnumContainsAny(arithFMF, fmfMap.first)) + llvmFMF = llvmFMF | fmfMap.second; + } + return llvmFMF; +} + +// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. +LLVM::FastmathFlagsAttr +mlir::arith::convertArithFastMathAttrToLLVM(arith::FastMathFlagsAttr fmfAttr) { + arith::FastMathFlags arithFMF = fmfAttr.getValue(); + return LLVM::FastmathFlagsAttr::get( + fmfAttr.getContext(), convertArithFastMathFlagsToLLVM(arithFMF)); +} diff --git a/mlir/lib/Conversion/ArithCommon/CMakeLists.txt b/mlir/lib/Conversion/ArithCommon/CMakeLists.txt new file mode 100644 index 0000000000000..888c45f2e52fe --- /dev/null +++ b/mlir/lib/Conversion/ArithCommon/CMakeLists.txt @@ -0,0 +1,10 @@ +add_mlir_conversion_library(MLIRArithAttrToLLVMConversion + AttrToLLVMConverter.cpp + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRLLVMDialect + ) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index f2814b56d4d34..1409b7fe1bca8 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -8,6 +8,7 @@ #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -24,93 +25,20 @@ using namespace mlir; namespace { -// Map arithmetic fastmath enum values to LLVMIR enum values. -static LLVM::FastmathFlags -convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF) { - LLVM::FastmathFlags llvmFMF{}; - const std::pair flags[] = { - {arith::FastMathFlags::nnan, LLVM::FastmathFlags::nnan}, - {arith::FastMathFlags::ninf, LLVM::FastmathFlags::ninf}, - {arith::FastMathFlags::nsz, LLVM::FastmathFlags::nsz}, - {arith::FastMathFlags::arcp, LLVM::FastmathFlags::arcp}, - {arith::FastMathFlags::contract, LLVM::FastmathFlags::contract}, - {arith::FastMathFlags::afn, LLVM::FastmathFlags::afn}, - {arith::FastMathFlags::reassoc, LLVM::FastmathFlags::reassoc}}; - for (auto fmfMap : flags) { - if (bitEnumContainsAny(arithFMF, fmfMap.first)) - llvmFMF = llvmFMF | fmfMap.second; - } - return llvmFMF; -} - -// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. -static LLVM::FastmathFlagsAttr -convertArithFastMathAttr(arith::FastMathFlagsAttr fmfAttr) { - arith::FastMathFlags arithFMF = fmfAttr.getValue(); - return LLVM::FastmathFlagsAttr::get( - fmfAttr.getContext(), convertArithFastMathFlagsToLLVM(arithFMF)); -} - -// Attribute converter that populates a NamedAttrList by removing the fastmath -// attribute from the source operation attributes, and replacing it with an -// equivalent LLVM fastmath attribute. -template -class AttrConvertFastMath { -public: - AttrConvertFastMath(SourceOp srcOp) { - // Copy the source attributes. - convertedAttr = NamedAttrList{srcOp->getAttrs()}; - // Get the name of the arith fastmath attribute. - llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); - // Remove the source fastmath attribute. - auto arithFMFAttr = convertedAttr.erase(arithFMFAttrName) - .template dyn_cast_or_null(); - if (arithFMFAttr) { - llvm::StringRef targetAttrName = TargetOp::getFastmathAttrName(); - convertedAttr.set(targetAttrName, convertArithFastMathAttr(arithFMFAttr)); - } - } - - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - -private: - NamedAttrList convertedAttr; -}; - -// Attribute converter that populates a NamedAttrList by removing the fastmath -// attribute from the source operation attributes. This may be useful for -// target operations that do not require the fastmath attribute, or for targets -// that do not yet support the LLVM fastmath attribute. -template -class AttrDropFastMath { -public: - AttrDropFastMath(SourceOp srcOp) { - // Copy the source attributes. - convertedAttr = NamedAttrList{srcOp->getAttrs()}; - // Get the name of the arith fastmath attribute. - llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); - // Remove the source fastmath attribute. - convertedAttr.erase(arithFMFAttrName); - } - - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - -private: - NamedAttrList convertedAttr; -}; - //===----------------------------------------------------------------------===// // Straightforward Op Lowerings //===----------------------------------------------------------------------===// -using AddFOpLowering = VectorConvertToLLVMPattern; +using AddFOpLowering = + VectorConvertToLLVMPattern; using AddIOpLowering = VectorConvertToLLVMPattern; using AndIOpLowering = VectorConvertToLLVMPattern; using BitcastOpLowering = VectorConvertToLLVMPattern; -using DivFOpLowering = VectorConvertToLLVMPattern; +using DivFOpLowering = + VectorConvertToLLVMPattern; using DivSIOpLowering = VectorConvertToLLVMPattern; using DivUIOpLowering = @@ -125,28 +53,30 @@ using FPToSIOpLowering = using FPToUIOpLowering = VectorConvertToLLVMPattern; // TODO: Add LLVM intrinsic support for fastmath -using MaxFOpLowering = - VectorConvertToLLVMPattern; +using MaxFOpLowering = VectorConvertToLLVMPattern; using MaxSIOpLowering = VectorConvertToLLVMPattern; using MaxUIOpLowering = VectorConvertToLLVMPattern; // TODO: Add LLVM intrinsic support for fastmath -using MinFOpLowering = - VectorConvertToLLVMPattern; +using MinFOpLowering = VectorConvertToLLVMPattern; using MinSIOpLowering = VectorConvertToLLVMPattern; using MinUIOpLowering = VectorConvertToLLVMPattern; -using MulFOpLowering = VectorConvertToLLVMPattern; +using MulFOpLowering = + VectorConvertToLLVMPattern; using MulIOpLowering = VectorConvertToLLVMPattern; -using NegFOpLowering = VectorConvertToLLVMPattern; +using NegFOpLowering = + VectorConvertToLLVMPattern; using OrIOpLowering = VectorConvertToLLVMPattern; // TODO: Add LLVM intrinsic support for fastmath -using RemFOpLowering = - VectorConvertToLLVMPattern; +using RemFOpLowering = VectorConvertToLLVMPattern; using RemSIOpLowering = VectorConvertToLLVMPattern; using RemUIOpLowering = @@ -160,8 +90,9 @@ using ShRUIOpLowering = VectorConvertToLLVMPattern; using SIToFPOpLowering = VectorConvertToLLVMPattern; -using SubFOpLowering = VectorConvertToLLVMPattern; +using SubFOpLowering = + VectorConvertToLLVMPattern; using SubIOpLowering = VectorConvertToLLVMPattern; using TruncFOpLowering = VectorConvertToLLVMPattern; diff --git a/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt index 45ee8708aa155..bb1fa2fbb6577 100644 --- a/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithToLLVM Core LINK_LIBS PUBLIC + MLIRArithAttrToLLVMConversion MLIRArithDialect MLIRLLVMCommonConversion MLIRLLVMDialect diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index a65814d36b5b4..62dae19a31344 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(AffineToStandard) add_subdirectory(AMDGPUToROCDL) +add_subdirectory(ArithCommon) add_subdirectory(ArithToLLVM) add_subdirectory(ArithToSPIRV) add_subdirectory(ArmNeon2dToIntr) diff --git a/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt b/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt index a6e6b4f56d37e..97393fc849691 100644 --- a/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRMathToLLVM Core LINK_LIBS PUBLIC + MLIRArithAttrToLLVMConversion MLIRLLVMCommonConversion MLIRLLVMDialect MLIRMathDialect diff --git a/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp b/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp index b67a86f443b5c..b5ce019b20832 100644 --- a/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp +++ b/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp @@ -8,6 +8,7 @@ #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/VectorPattern.h" @@ -24,31 +25,39 @@ namespace mlir { using namespace mlir; namespace { -using AbsFOpLowering = VectorConvertToLLVMPattern; -using CeilOpLowering = VectorConvertToLLVMPattern; + +template +using ConvertFastMath = arith::AttrConvertFastMathToLLVM; + +template +using ConvertFMFMathToLLVMPattern = + VectorConvertToLLVMPattern; + +using AbsFOpLowering = ConvertFMFMathToLLVMPattern; +using CeilOpLowering = ConvertFMFMathToLLVMPattern; using CopySignOpLowering = - VectorConvertToLLVMPattern; -using CosOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using CosOpLowering = ConvertFMFMathToLLVMPattern; using CtPopFOpLowering = VectorConvertToLLVMPattern; -using Exp2OpLowering = VectorConvertToLLVMPattern; -using ExpOpLowering = VectorConvertToLLVMPattern; +using Exp2OpLowering = ConvertFMFMathToLLVMPattern; +using ExpOpLowering = ConvertFMFMathToLLVMPattern; using FloorOpLowering = - VectorConvertToLLVMPattern; -using FmaOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using FmaOpLowering = ConvertFMFMathToLLVMPattern; using Log10OpLowering = - VectorConvertToLLVMPattern; -using Log2OpLowering = VectorConvertToLLVMPattern; -using LogOpLowering = VectorConvertToLLVMPattern; -using PowFOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using Log2OpLowering = ConvertFMFMathToLLVMPattern; +using LogOpLowering = ConvertFMFMathToLLVMPattern; +using PowFOpLowering = ConvertFMFMathToLLVMPattern; using RoundEvenOpLowering = - VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; using RoundOpLowering = - VectorConvertToLLVMPattern; -using SinOpLowering = VectorConvertToLLVMPattern; -using SqrtOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using SinOpLowering = ConvertFMFMathToLLVMPattern; +using SqrtOpLowering = ConvertFMFMathToLLVMPattern; using FTruncOpLowering = - VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; // A `CtLz/CtTz/absi(a)` is converted into `CtLz/CtTz/absi(a, false)`. template @@ -113,6 +122,8 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath expAttrs(op); + ConvertFastMath subAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one; @@ -123,8 +134,10 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { } else { one = rewriter.create(loc, operandType, floatOne); } - auto exp = rewriter.create(loc, adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, exp, one); + auto exp = rewriter.create(loc, adaptor.getOperand(), + expAttrs.getAttrs()); + rewriter.replaceOpWithNewOp( + op, operandType, ValueRange{exp, one}, subAttrs.getAttrs()); return success(); } @@ -142,9 +155,10 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto exp = - rewriter.create(loc, llvm1DVectorTy, operands[0]); - return rewriter.create(loc, llvm1DVectorTy, exp, one); + auto exp = rewriter.create( + loc, llvm1DVectorTy, operands[0], expAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{exp, one}, subAttrs.getAttrs()); }, rewriter); } @@ -166,6 +180,8 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath addAttrs(op); + ConvertFastMath logAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one = @@ -176,9 +192,11 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { floatOne)) : rewriter.create(loc, operandType, floatOne); - auto add = rewriter.create(loc, operandType, one, - adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, add); + auto add = rewriter.create( + loc, operandType, ValueRange{one, adaptor.getOperand()}, + addAttrs.getAttrs()); + rewriter.replaceOpWithNewOp(op, operandType, ValueRange{add}, + logAttrs.getAttrs()); return success(); } @@ -196,9 +214,11 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto add = rewriter.create(loc, llvm1DVectorTy, one, - operands[0]); - return rewriter.create(loc, llvm1DVectorTy, add); + auto add = rewriter.create(loc, llvm1DVectorTy, + ValueRange{one, operands[0]}, + addAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{add}, logAttrs.getAttrs()); }, rewriter); } @@ -220,6 +240,8 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath sqrtAttrs(op); + ConvertFastMath divAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one; @@ -230,8 +252,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { } else { one = rewriter.create(loc, operandType, floatOne); } - auto sqrt = rewriter.create(loc, adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, one, sqrt); + auto sqrt = rewriter.create(loc, adaptor.getOperand(), + sqrtAttrs.getAttrs()); + rewriter.replaceOpWithNewOp( + op, operandType, ValueRange{one, sqrt}, divAttrs.getAttrs()); return success(); } @@ -249,9 +273,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto sqrt = - rewriter.create(loc, llvm1DVectorTy, operands[0]); - return rewriter.create(loc, llvm1DVectorTy, one, sqrt); + auto sqrt = rewriter.create( + loc, llvm1DVectorTy, operands[0], sqrtAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{one, sqrt}, divAttrs.getAttrs()); }, rewriter); } diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir index bcdbad1709e93..8c7f031cb97d9 100644 --- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir +++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir @@ -36,6 +36,18 @@ func.func @log1p(%arg0 : f32) { // ----- +// CHECK-LABEL: func @log1p_fmf( +// CHECK-SAME: f32 +func.func @log1p_fmf(%arg0 : f32) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 + // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %arg0 {fastmathFlags = #llvm.fastmath} : f32 + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %0 = math.log1p %arg0 fastmath : f32 + func.return +} + +// ----- + // CHECK-LABEL: func @log1p_2dvector( func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> @@ -49,6 +61,19 @@ func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // ----- +// CHECK-LABEL: func @log1p_2dvector_fmf( +func.func @log1p_2dvector_fmf(%arg0 : vector<4x3xf32>) { + // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> + // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[EXTRACT]] {fastmathFlags = #llvm.fastmath} : vector<3xf32> + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) {fastmathFlags = #llvm.fastmath} : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[LOG]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> + %0 = math.log1p %arg0 fastmath : vector<4x3xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @expm1( // CHECK-SAME: f32 func.func @expm1(%arg0 : f32) { @@ -61,6 +86,42 @@ func.func @expm1(%arg0 : f32) { // ----- +// CHECK-LABEL: func @expm1_fmf( +// CHECK-SAME: f32 +func.func @expm1_fmf(%arg0 : f32) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] {fastmathFlags = #llvm.fastmath} : f32 + %0 = math.expm1 %arg0 fastmath : f32 + func.return +} + +// ----- + +// CHECK-LABEL: func @expm1_vector( +// CHECK-SAME: vector<4xf32> +func.func @expm1_vector(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : vector<4xf32> + %0 = math.expm1 %arg0 : vector<4xf32> + func.return +} + +// ----- + +// CHECK-LABEL: func @expm1_vector_fmf( +// CHECK-SAME: vector<4xf32> +func.func @expm1_vector_fmf(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> + %0 = math.expm1 %arg0 fastmath : vector<4xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt( // CHECK-SAME: f32 func.func @rsqrt(%arg0 : f32) { @@ -148,6 +209,18 @@ func.func @rsqrt_double(%arg0 : f64) { // ----- +// CHECK-LABEL: func @rsqrt_double_fmf( +// CHECK-SAME: f64 +func.func @rsqrt_double_fmf(%arg0 : f64) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f64) : f64 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) {fastmathFlags = #llvm.fastmath} : (f64) -> f64 + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath} : f64 + %0 = math.rsqrt %arg0 fastmath : f64 + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt_vector( // CHECK-SAME: vector<4xf32> func.func @rsqrt_vector(%arg0 : vector<4xf32>) { @@ -160,6 +233,18 @@ func.func @rsqrt_vector(%arg0 : vector<4xf32>) { // ----- +// CHECK-LABEL: func @rsqrt_vector_fmf( +// CHECK-SAME: vector<4xf32> +func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> + %0 = math.rsqrt %arg0 fastmath : vector<4xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt_multidim_vector( func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> @@ -210,3 +295,19 @@ func.func @trunc(%arg0 : f32) { %0 = math.trunc %arg0 : f32 func.return } + +// ----- + +// CHECK-LABEL: func @fastmath( +// CHECK-SAME: f32 +func.func @fastmath(%arg0 : f32, %arg1 : vector<4xf32>) { + // CHECK: llvm.intr.trunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %0 = math.trunc %arg0 fastmath : f32 + // CHECK: llvm.intr.pow(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %1 = math.powf %arg0, %arg0 fastmath : f32 + // CHECK: llvm.intr.sqrt(%arg0) : (f32) -> f32 + %2 = math.sqrt %arg0 fastmath : f32 + // CHECK: llvm.intr.fma(%arg0, %arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32, f32) -> f32 + %3 = math.fma %arg0, %arg0, %arg0 fastmath : f32 + func.return +} diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir index d984cbb66f8c2..7e121f80dd79e 100644 --- a/mlir/test/Dialect/Math/ops.mlir +++ b/mlir/test/Dialect/Math/ops.mlir @@ -269,3 +269,17 @@ func.func @trunc(%f: f32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>) { %2 = math.trunc %t : tensor<4x4x?xf32> return } + +// CHECK-LABEL: func @fastmath( +// CHECK-SAME: %[[F:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[T:.*]]: tensor<4x4x?xf32>) +func.func @fastmath(%f: f32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>) { + // CHECK: %{{.*}} = math.trunc %[[F]] fastmath : f32 + %0 = math.trunc %f fastmath : f32 + // CHECK: %{{.*}} = math.powf %[[V]], %[[V]] fastmath : vector<4xf32> + %1 = math.powf %v, %v fastmath : vector<4xf32> + // CHECK: %{{.*}} = math.fma %[[T]], %[[T]], %[[T]] : tensor<4x4x?xf32> + %2 = math.fma %t, %t, %t fastmath : tensor<4x4x?xf32> + // CHECK: %{{.*}} = math.absf %[[F]] fastmath : f32 + %3 = math.absf %f fastmath : f32 + return +} From 234e08ec3c1a94bf67ee78c25d1c5241cc69aaa5 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 4 Nov 2022 10:46:21 -0700 Subject: [PATCH 268/516] [lldb] Fix format specifier warning in EmulateInstructionRISCV Fixes warning: format specifies type 'unsigned long' but the argument has type 'lldb::addr_t' (aka 'unsigned long long') [-Wformat] --- .../Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp index 1d8f3a2750277..7b2f8c81e44bc 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -504,7 +504,7 @@ llvm::Optional EmulateInstructionRISCV::Decode(uint32_t inst) { for (const InstrPattern &pat : PATTERNS) { if ((inst & pat.type_mask) == pat.eigen) { - LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(%x at %lx) was decoded to %s", + LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(%x at %llx) was decoded to %s", __FUNCTION__, inst, m_addr, pat.name); auto decoded = is_rvc ? pat.decode(try_rvc) : pat.decode(inst); return DecodeResult{decoded, inst, is_rvc, pat}; From 095ce655ec84fc21b6002808c698687c37f2bf12 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 1 Nov 2022 18:27:04 -0700 Subject: [PATCH 269/516] [mlir][math] Simplify pow(x, 0.75) into sqrt(sqrt(x)) * sqrt(x). Trivial simplification for CPU2017/503.bwaves resulting in 3.89% speed-up on icelake. Differential Revision: https://reviews.llvm.org/D137351 --- .../Math/Transforms/AlgebraicSimplification.cpp | 9 +++++++++ .../Dialect/Math/algebraic-simplification.mlir | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp index bea939a65022a..a1e6746b8fe9b 100644 --- a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp +++ b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp @@ -109,6 +109,15 @@ PowFStrengthReduction::matchAndRewrite(math::PowFOp op, return success(); } + // Replace `pow(x, 0.75)` with `sqrt(sqrt(x)) * sqrt(x)`. + if (isExponentValue(0.75)) { + Value pow_half = rewriter.create(op.getLoc(), x); + Value pow_quarter = rewriter.create(op.getLoc(), pow_half); + rewriter.replaceOpWithNewOp( + op, ValueRange{pow_half, pow_quarter}); + return success(); + } + return failure(); } diff --git a/mlir/test/Dialect/Math/algebraic-simplification.mlir b/mlir/test/Dialect/Math/algebraic-simplification.mlir index 806779ad9198d..21c9f7a8e7f17 100644 --- a/mlir/test/Dialect/Math/algebraic-simplification.mlir +++ b/mlir/test/Dialect/Math/algebraic-simplification.mlir @@ -74,6 +74,22 @@ func.func @pow_rsqrt(%arg0: f32, %arg1 : vector<4xf32>) -> (f32, vector<4xf32>) return %0, %1 : f32, vector<4xf32> } +// CHECK-LABEL: @pow_0_75 +func.func @pow_0_75(%arg0: f32, %arg1 : vector<4xf32>) -> (f32, vector<4xf32>) { + // CHECK: %[[SQRT1S:.*]] = math.sqrt %arg0 + // CHECK: %[[SQRT2S:.*]] = math.sqrt %[[SQRT1S]] + // CHECK: %[[SCALAR:.*]] = arith.mulf %[[SQRT1S]], %[[SQRT2S]] + // CHECK: %[[SQRT1V:.*]] = math.sqrt %arg1 + // CHECK: %[[SQRT2V:.*]] = math.sqrt %[[SQRT1V]] + // CHECK: %[[VECTOR:.*]] = arith.mulf %[[SQRT1V]], %[[SQRT2V]] + // CHECK: return %[[SCALAR]], %[[VECTOR]] + %c = arith.constant 0.75 : f32 + %v = arith.constant dense <0.75> : vector<4xf32> + %0 = math.powf %arg0, %c : f32 + %1 = math.powf %arg1, %v : vector<4xf32> + return %0, %1 : f32, vector<4xf32> +} + // CHECK-LABEL: @ipowi_zero_exp( // CHECK-SAME: %[[ARG0:.+]]: i32 // CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> From b6cf94e973f9659086633ca56dc51bc74d4125eb Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 4 Nov 2022 18:58:43 +0100 Subject: [PATCH 270/516] Fix format specifier warning in EmulateInstructionRISCV more Yes, the portable macro is still the only way to do this. --- .../Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp index 7b2f8c81e44bc..c05b43f300fda 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -504,7 +504,9 @@ llvm::Optional EmulateInstructionRISCV::Decode(uint32_t inst) { for (const InstrPattern &pat : PATTERNS) { if ((inst & pat.type_mask) == pat.eigen) { - LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(%x at %llx) was decoded to %s", + LLDB_LOGF(log, + "EmulateInstructionRISCV::%s: inst(%x at %" PRIx64 + ") was decoded to %s", __FUNCTION__, inst, m_addr, pat.name); auto decoded = is_rvc ? pat.decode(try_rvc) : pat.decode(inst); return DecodeResult{decoded, inst, is_rvc, pat}; From 93c7a9bf6cc142a5a37f22e7dc9fe2c4e20befe1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 4 Nov 2022 19:06:44 +0100 Subject: [PATCH 271/516] [bazel] Port 589764a38264 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 2208ff0ddf7e6..73b09e6fe87c3 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5923,6 +5923,17 @@ cc_library( ], ) +cc_library( + name = "ArithAttrToLLVMConversion", + srcs = glob(["lib/Conversion/ArithCommon/*.cpp"]), + hdrs = glob(["include/mlir/Conversion/ArithCommon/*.h"]), + includes = ["include"], + deps = [ + ":ArithDialect", + ":LLVMDialect", + ], +) + cc_library( name = "ArithToLLVM", srcs = glob(["lib/Conversion/ArithToLLVM/*.cpp"]), @@ -5930,6 +5941,7 @@ cc_library( includes = ["include"], deps = [ ":Analysis", + ":ArithAttrToLLVMConversion", ":ArithDialect", ":ConversionPassIncGen", ":IR", @@ -5968,6 +5980,7 @@ cc_library( includes = ["include"], deps = [ ":Analysis", + ":ArithAttrToLLVMConversion", ":ConversionPassIncGen", ":DataLayoutInterfaces", ":IR", @@ -9277,6 +9290,7 @@ td_library( ], includes = ["include"], deps = [ + ":ArithOpsTdFiles", ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", From 1186e9d59fea662292cdf62fdd1544b5b27d7d37 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 4 Nov 2022 14:10:54 -0400 Subject: [PATCH 272/516] [LLVM][AMDGPU] Specialize 32-bit atomic fadd instruction for generic address space The 32-bit floating-point atomic add instructions on AMDGPUs does not support a "flat" or "generic" address space. So, if the address space cannot be determined statically, the AMDGPU backend will fall back to a CAS loop (which does support "flat" addressing). Instead, this patch emits runtime address-space checks to allow native FP atomic add instructions for global and LDS memory (and non-atomic FP add instructions for private/scratch memory). In order to do that, this patch introduces a new interface function `emitExpandAtomicRMW`. It is expected to be called when a common atomic expand doesn't work for a specific target, such as the case we discussed here. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D129690 --- llvm/include/llvm/CodeGen/TargetLowering.h | 8 + llvm/lib/CodeGen/AtomicExpandPass.cpp | 3 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 151 ++++++ llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 + llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 431 ++++++++++++++++++ ...and-atomic-rmw-fadd-flat-specialization.ll | 347 ++++++++++++++ .../AMDGPU/expand-atomic-rmw-fadd.ll | 52 ++- 7 files changed, 979 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll create mode 100644 llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a76fb97a14dc5..d0a7375c6a3b8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2000,6 +2000,14 @@ class TargetLoweringBase { llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); } + /// Perform a atomicrmw expansion using a target-specific way. This is + /// expected to be called when masked atomicrmw and bit test atomicrmw don't + /// work, and the target supports another way to lower atomicrmw. + virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { + llvm_unreachable( + "Generic atomicrmw expansion unimplemented on this target"); + } + /// Perform a bit test atomicrmw using a target-specific intrinsic. This /// represents the combined bit test intrinsic which will be lowered at a late /// stage by the backend. diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 8b46e9580729a..72262b4423fc1 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -610,6 +610,9 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { } case TargetLoweringBase::AtomicExpansionKind::NotAtomic: return lowerAtomicRMWInst(AI); + case TargetLoweringBase::AtomicExpansionKind::Expand: + TLI->emitExpandAtomicRMW(AI); + return true; default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2570e7a661e59..347b0ee9d3b46 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -12866,6 +12867,19 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); + // If it is in flat address space, and the type is float, we will try to + // expand it, if the target supports global and lds atomic fadd. The + // reason we need that is, in the expansion, we emit the check of address + // space. If it is in global address space, we emit the global atomic + // fadd; if it is in shared address space, we emit the LDS atomic fadd. + if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() && + Subtarget->hasLDSFPAtomicAdd()) { + if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) + return AtomicExpansionKind::Expand; + if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) + return AtomicExpansionKind::Expand; + } + return AtomicExpansionKind::CmpXChg; } @@ -13066,3 +13080,140 @@ bool SITargetLowering::checkForPhysRegDependency( } return false; } + +void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { + assert(Subtarget->hasAtomicFaddInsts() && + "target should have atomic fadd instructions"); + assert(AI->getType()->isFloatTy() && + AI->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS && + "generic atomicrmw expansion only supports FP32 operand in flat " + "address space"); + assert(AI->getOperation() == AtomicRMWInst::FAdd && + "only fadd is supported for now"); + + // Given: atomicrmw fadd float* %addr, float %val ordering + // + // With this expansion we produce the following code: + // [...] + // %int8ptr = bitcast float* %addr to i8* + // br label %atomicrmw.check.shared + // + // atomicrmw.check.shared: + // %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %int8ptr) + // br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private + // + // atomicrmw.shared: + // %cast.shared = addrspacecast float* %addr to float addrspace(3)* + // %loaded.shared = atomicrmw fadd float addrspace(3)* %cast.shared, + // float %val ordering + // br label %atomicrmw.phi + // + // atomicrmw.check.private: + // %is.private = call i1 @llvm.amdgcn.is.private(i8* %int8ptr) + // br i1 %is.private, label %atomicrmw.private, label %atomicrmw.global + // + // atomicrmw.private: + // %cast.private = addrspacecast float* %addr to float addrspace(5)* + // %loaded.private = load float, float addrspace(5)* %cast.private + // %val.new = fadd float %loaded.private, %val + // store float %val.new, float addrspace(5)* %cast.private + // br label %atomicrmw.phi + // + // atomicrmw.global: + // %cast.global = addrspacecast float* %addr to float addrspace(1)* + // %loaded.global = atomicrmw fadd float addrspace(1)* %cast.global, + // float %val ordering + // br label %atomicrmw.phi + // + // atomicrmw.phi: + // %loaded.phi = phi float [ %loaded.shared, %atomicrmw.shared ], + // [ %loaded.private, %atomicrmw.private ], + // [ %loaded.global, %atomicrmw.global ] + // br label %atomicrmw.end + // + // atomicrmw.end: + // [...] + + IRBuilder<> Builder(AI); + LLVMContext &Ctx = Builder.getContext(); + + BasicBlock *BB = Builder.GetInsertBlock(); + Function *F = BB->getParent(); + BasicBlock *ExitBB = + BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); + BasicBlock *CheckSharedBB = + BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB); + BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB); + BasicBlock *CheckPrivateBB = + BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB); + BasicBlock *PrivateBB = + BasicBlock::Create(Ctx, "atomicrmw.private", F, ExitBB); + BasicBlock *GlobalBB = BasicBlock::Create(Ctx, "atomicrmw.global", F, ExitBB); + BasicBlock *PhiBB = BasicBlock::Create(Ctx, "atomicrmw.phi", F, ExitBB); + + Value *Val = AI->getValOperand(); + Type *ValTy = Val->getType(); + Value *Addr = AI->getPointerOperand(); + PointerType *PtrTy = cast(Addr->getType()); + + auto CreateNewAtomicRMW = [AI](IRBuilder<> &Builder, Value *Addr, + Value *Val) -> Value * { + AtomicRMWInst *OldVal = + Builder.CreateAtomicRMW(AI->getOperation(), Addr, Val, AI->getAlign(), + AI->getOrdering(), AI->getSyncScopeID()); + SmallVector> MDs; + AI->getAllMetadata(MDs); + for (auto &P : MDs) + OldVal->setMetadata(P.first, P.second); + return OldVal; + }; + + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + Value *Int8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); + Builder.CreateBr(CheckSharedBB); + + Builder.SetInsertPoint(CheckSharedBB); + CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {}, + {Int8Ptr}, nullptr, "is.shared"); + Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB); + + Builder.SetInsertPoint(SharedBB); + Value *CastToLocal = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::LOCAL_ADDRESS)); + Value *LoadedShared = CreateNewAtomicRMW(Builder, CastToLocal, Val); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(CheckPrivateBB); + CallInst *IsPrivate = Builder.CreateIntrinsic( + Intrinsic::amdgcn_is_private, {}, {Int8Ptr}, nullptr, "is.private"); + Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB); + + Builder.SetInsertPoint(PrivateBB); + Value *CastToPrivate = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::PRIVATE_ADDRESS)); + Value *LoadedPrivate = + Builder.CreateLoad(ValTy, CastToPrivate, "loaded.private"); + Value *NewVal = Builder.CreateFAdd(LoadedPrivate, Val, "val.new"); + Builder.CreateStore(NewVal, CastToPrivate); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(GlobalBB); + Value *CastToGlobal = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::GLOBAL_ADDRESS)); + Value *LoadedGlobal = CreateNewAtomicRMW(Builder, CastToGlobal, Val); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(PhiBB); + PHINode *Loaded = Builder.CreatePHI(ValTy, 3, "loaded.phi"); + Loaded->addIncoming(LoadedShared, SharedBB); + Loaded->addIncoming(LoadedPrivate, PrivateBB); + Loaded->addIncoming(LoadedGlobal, GlobalBB); + Builder.CreateBr(ExitBB); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9e8ff565fe6bd..b47730f5d3b27 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -493,6 +493,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + void emitExpandAtomicRMW(AtomicRMWInst *AI) const override; const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent) const override; diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll new file mode 100644 index 0000000000000..a7584ac5cd787 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s + +define float @syncscope_system(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_system: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: buffer_wbinvl1_vol +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB0_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_system: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dword v3, v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 +; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_system: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_load_dword v3, v[0:1] +; GFX940-NEXT: s_mov_b64 s[0:1], 0 +; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_mov_b32_e32 v5, v3 +; GFX940-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX940-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: buffer_inv sc0 sc1 +; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX940-NEXT: s_cbranch_execnz .LBB0_1 +; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX940-NEXT: v_mov_b32_e32 v0, v3 +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_system: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_load_b32 v3, v[0:1] +; GFX1100-NEXT: s_mov_b32 s0, 0 +; GFX1100-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v4, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: buffer_gl1_inv +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX1100-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: s_cbranch_execnz .LBB0_1 +; GFX1100-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1100-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: v_mov_b32_e32 v0, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val seq_cst + ret float %res +} + +define float @syncscope_workgroup_rtn(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_workgroup_rtn: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB1_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_workgroup_rtn: +; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX90A-NEXT: s_lshl_b32 s4, s4, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX90A-NEXT: ; implicit-def: $vgpr3 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB1_6 +; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.check.private +; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX90A-NEXT: s_lshl_b32 s6, s6, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX90A-NEXT: ; implicit-def: $vgpr3 +; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB1_3 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB1_3: ; %Flow +; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB1_5 +; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.private +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_f32_e32 v1, v3, v2 +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: .LBB1_5: ; %Flow1 +; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB1_6: ; %Flow2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB1_8 +; GFX90A-NEXT: ; %bb.7: ; %atomicrmw.shared +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ds_add_rtn_f32 v3, v0, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_workgroup_rtn: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_workgroup_rtn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_workgroup_nortn: +; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX908-NEXT: s_lshl_b32 s4, s4, 16 +; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB2_3 +; GFX908-NEXT: ; %bb.1: ; %Flow2 +; GFX908-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB2_8 +; GFX908-NEXT: .LBB2_2: ; %atomicrmw.phi +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: s_setpc_b64 s[30:31] +; GFX908-NEXT: .LBB2_3: ; %atomicrmw.check.private +; GFX908-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX908-NEXT: s_lshl_b32 s6, s6, 16 +; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX908-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX908-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX908-NEXT: s_cbranch_execz .LBB2_5 +; GFX908-NEXT: ; %bb.4: ; %atomicrmw.global +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX908-NEXT: ; implicit-def: $vgpr2 +; GFX908-NEXT: .LBB2_5: ; %Flow +; GFX908-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX908-NEXT: s_cbranch_execz .LBB2_7 +; GFX908-NEXT: ; %bb.6: ; %atomicrmw.private +; GFX908-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX908-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX908-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX908-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX908-NEXT: .LBB2_7: ; %Flow1 +; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX908-NEXT: ; implicit-def: $vgpr2 +; GFX908-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX908-NEXT: s_cbranch_execz .LBB2_2 +; GFX908-NEXT: .LBB2_8: ; %atomicrmw.shared +; GFX908-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX908-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: ds_add_f32 v0, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_workgroup_nortn: +; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX90A-NEXT: s_lshl_b32 s4, s4, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB2_3 +; GFX90A-NEXT: ; %bb.1: ; %Flow2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB2_8 +; GFX90A-NEXT: .LBB2_2: ; %atomicrmw.phi +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; GFX90A-NEXT: .LBB2_3: ; %atomicrmw.check.private +; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX90A-NEXT: s_lshl_b32 s6, s6, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB2_5 +; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB2_5: ; %Flow +; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB2_7 +; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.private +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: .LBB2_7: ; %Flow1 +; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB2_2 +; GFX90A-NEXT: .LBB2_8: ; %atomicrmw.shared +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ds_add_f32 v0, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_workgroup_nortn: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_workgroup_nortn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX1100-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret void +} + +define float @no_unsafe(float* %addr, float %val) { +; GFX908-LABEL: no_unsafe: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB3_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: no_unsafe: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dword v3, v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 +; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB3_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: no_unsafe: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_load_dword v3, v[0:1] +; GFX940-NEXT: s_mov_b64 s[0:1], 0 +; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_mov_b32_e32 v5, v3 +; GFX940-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX940-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX940-NEXT: s_cbranch_execnz .LBB3_1 +; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX940-NEXT: v_mov_b32_e32 v0, v3 +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: no_unsafe: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_load_b32 v3, v[0:1] +; GFX1100-NEXT: s_mov_b32 s0, 0 +; GFX1100-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v4, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX1100-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: s_cbranch_execnz .LBB3_1 +; GFX1100-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1100-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: v_mov_b32_e32 v0, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll new file mode 100644 index 0000000000000..243927c45f89d --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -0,0 +1,347 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -atomic-expand %s | FileCheck -check-prefix=GFX908 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -atomic-expand %s | FileCheck -check-prefix=GFX90A %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -atomic-expand %s | FileCheck -check-prefix=GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -atomic-expand %s | FileCheck -check-prefix=GFX1100 %s + +define float @syncscope_system(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_system( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @syncscope_system( +; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX90A: atomicrmw.start: +; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[TMP6]] +; +; GFX940-LABEL: @syncscope_system( +; GFX940-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX940: atomicrmw.start: +; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX940-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX940: atomicrmw.end: +; GFX940-NEXT: ret float [[TMP6]] +; +; GFX1100-LABEL: @syncscope_system( +; GFX1100-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX1100: atomicrmw.start: +; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX1100: atomicrmw.end: +; GFX1100-NEXT: ret float [[TMP6]] +; +; GFX11-LABEL: @syncscope_system( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val seq_cst + ret float %res +} + +define float @syncscope_workgroup_rtn(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_workgroup_rtn( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @syncscope_workgroup_rtn( +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[LOADED_PHI]] +; +; GFX940-LABEL: @syncscope_workgroup_rtn( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX940-NEXT: ret float [[RES]] +; +; GFX1100-LABEL: @syncscope_workgroup_rtn( +; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX1100-NEXT: ret float [[RES]] +; +; GFX11-LABEL: @syncscope_workgroup_rtn( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_workgroup_nortn( +; GFX908-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX908: atomicrmw.check.shared: +; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX908: atomicrmw.shared: +; GFX908-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX908-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX908: atomicrmw.check.private: +; GFX908-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX908-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX908: atomicrmw.private: +; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX908-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI]] +; GFX908: atomicrmw.global: +; GFX908-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX908-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI]] +; GFX908: atomicrmw.phi: +; GFX908-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret void +; +; GFX90A-LABEL: @syncscope_workgroup_nortn( +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret void +; +; GFX940-LABEL: @syncscope_workgroup_nortn( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX940-NEXT: ret void +; +; GFX1100-LABEL: @syncscope_workgroup_nortn( +; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX1100-NEXT: ret void +; +; GFX11-LABEL: @syncscope_workgroup_nortn( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret void + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret void +} + +define float @no_unsafe(float* %addr, float %val) { +; GFX908-LABEL: @no_unsafe( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @no_unsafe( +; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX90A: atomicrmw.start: +; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[TMP6]] +; +; GFX940-LABEL: @no_unsafe( +; GFX940-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX940: atomicrmw.start: +; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX940-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX940: atomicrmw.end: +; GFX940-NEXT: ret float [[TMP6]] +; +; GFX1100-LABEL: @no_unsafe( +; GFX1100-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX1100: atomicrmw.start: +; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX1100: atomicrmw.end: +; GFX1100-NEXT: ret float [[TMP6]] +; +; GFX11-LABEL: @no_unsafe( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index 19ce5effd76c6..99bdde2a26301 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -263,21 +263,33 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(float* %ptr, float %value) #0 ; GFX908-NEXT: ret float [[TMP6]] ; ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( -; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 -; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX90A: atomicrmw.start: -; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] -; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* -; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 -; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4 -; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[PTR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[PTR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[PTR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VALUE]] syncscope("wavefront") monotonic, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] ; GFX90A: atomicrmw.end: -; GFX90A-NEXT: ret float [[TMP6]] +; GFX90A-NEXT: ret float [[LOADED_PHI]] ; ; GFX940-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 @@ -912,6 +924,18 @@ define half @test_atomicrmw_fadd_f16_global_align4(half addrspace(1)* %ptr, half ; GFX908-LABEL: @test_atomicrmw_fadd_f16_global_align4( ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 ; GFX908-NEXT: ret half [[RES]] +; +; GFX90A-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX90A-NEXT: ret half [[RES]] +; +; GFX940-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX940-NEXT: ret half [[RES]] +; +; GFX11-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX11-NEXT: ret half [[RES]] ; %res = atomicrmw fadd half addrspace(1)* %ptr, half %value seq_cst, align 4 ret half %res From 9a456b7ad3125834377b8aab1598785e8559c224 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Nov 2022 18:42:06 +0000 Subject: [PATCH 273/516] [IndVars] Forget SCEV for replaced PHI. Additional SCEV verification highlighted a case where the cached loop dispositions where incorrect after simplifying a phi node in IndVars. Fix it by invalidating the phi before replacing it. Fixes #58750 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 8 +-- .../invalidate-modified-lcssa-phi.ll | 52 +++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 9efb40f231e40..f6431b77d8027 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1308,7 +1308,8 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, } static void replaceLoopPHINodesWithPreheaderValues( - LoopInfo *LI, Loop *L, SmallVectorImpl &DeadInsts) { + LoopInfo *LI, Loop *L, SmallVectorImpl &DeadInsts, + ScalarEvolution &SE) { assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); auto *LoopPreheader = L->getLoopPreheader(); auto *LoopHeader = L->getHeader(); @@ -1317,6 +1318,7 @@ static void replaceLoopPHINodesWithPreheaderValues( auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); for (User *U : PN.users()) Worklist.push_back(cast(U)); + SE.forgetValue(&PN); PN.replaceAllUsesWith(PreheaderIncoming); DeadInsts.emplace_back(&PN); } @@ -1588,7 +1590,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // unconditional exit, we can still replace header phis with their // preheader value. if (!L->contains(BI->getSuccessor(CI->isNullValue()))) - replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); return true; } @@ -1675,7 +1677,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // the header PHIs with values coming from the preheader. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); - replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); Changed = true; continue; } diff --git a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll index fb748ae4dc494..856fc37620499 100644 --- a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll @@ -156,3 +156,55 @@ exit: %lcssa = phi i16 [ %sum.next, %loop ] ret i16 0 } + +define i32 @pr58750(i16 %a, ptr %dst, i1 %c.0) { +; CHECK-LABEL: @pr58750( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP186_NOT:%.*]] = icmp eq i16 [[A:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP186_NOT]]) +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[P_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LCSSA:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[P_0]], 0 +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: store i16 0, ptr [[DST:%.*]], align 1 +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[LCSSA]] = phi i32 [ [[XOR]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C_0:%.*]], label [[OUTER_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[LCSSA_LCSSA:%.*]] = phi i32 [ [[LCSSA]], [[OUTER_LATCH]] ] +; CHECK-NEXT: ret i32 [[LCSSA_LCSSA]] +; +entry: + %cmp186.not = icmp eq i16 %a, 0 + call void @llvm.assume(i1 %cmp186.not) + br label %outer.header + +outer.header: + %p.0 = phi i32 [ 0, %entry ], [ %lcssa, %outer.latch ] + br label %inner + +inner: + %inner.iv = phi i16 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %p.1 = phi i32 [ %p.0, %outer.header ], [ %xor, %inner ] + store i16 %inner.iv, ptr %dst, align 1 + %conv = sext i16 %inner.iv to i32 + %xor = xor i32 %p.1, %conv + %inner.iv.next = add nuw i16 %inner.iv, 1 + %c.1 = icmp ult i16 %inner.iv.next, %a + br i1 %c.1, label %inner, label %outer.latch + +outer.latch: + %lcssa = phi i32 [ %xor, %inner ] + br i1 %c.0, label %outer.header, label %exit + +exit: + ret i32 %lcssa +} + +; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #1 + + From c064545403917bedd450e07209e7870f1773f90f Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Fri, 4 Nov 2022 15:10:18 -0400 Subject: [PATCH 274/516] [mlir][spirv] Do not truncate i/f64 -> i/f32 in SPIRVConversion This truncation can be unexpected and break program behavior. Dedicated emulation passes should be used instead. Also rename pass options to "emulate-lt-32-bit-scalar-types". Fixes: https://github.com/llvm/llvm-project/issues/57917 Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D137115 --- mlir/include/mlir/Conversion/Passes.td | 24 +-- .../SPIRV/Transforms/SPIRVConversion.h | 16 +- .../Conversion/ArithToSPIRV/ArithToSPIRV.cpp | 2 +- .../ControlFlowToSPIRVPass.cpp | 2 +- .../FuncToSPIRV/FuncToSPIRVPass.cpp | 2 +- .../TensorToSPIRV/TensorToSPIRVPass.cpp | 2 +- .../SPIRV/Transforms/SPIRVConversion.cpp | 9 +- .../arith-to-spirv-unsupported.mlir | 70 +++++++- .../ArithToSPIRV/arith-to-spirv.mlir | 163 +----------------- .../FuncToSPIRV/types-to-spirv.mlir | 70 ++++---- 10 files changed, 138 insertions(+), 222 deletions(-) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 66ac9eedf1bfb..cef82f1e29ff1 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -118,10 +118,10 @@ def ConvertArithToSPIRV : Pass<"convert-arith-to-spirv"> { let constructor = "mlir::arith::createConvertArithToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support">, + "Emulate narrower scalar types with 32-bit ones if not supported by " + "the target">, Option<"enableFastMath", "enable-fast-math", "bool", /*default=*/"false", "Enable fast math mode (assuming no NaN and infinity for floating " @@ -259,10 +259,10 @@ def ConvertControlFlowToSPIRV : Pass<"convert-cf-to-spirv"> { let constructor = "mlir::createConvertControlFlowToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } @@ -320,10 +320,10 @@ def ConvertFuncToSPIRV : Pass<"convert-func-to-spirv"> { let constructor = "mlir::createConvertFuncToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } @@ -815,10 +815,10 @@ def ConvertTensorToSPIRV : Pass<"convert-tensor-to-spirv"> { let constructor = "mlir::createConvertTensorToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } diff --git a/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h b/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h index 9b480f6cc9e3a..7d362526cc22f 100644 --- a/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h +++ b/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h @@ -30,21 +30,21 @@ struct SPIRVConversionOptions { /// The number of bits to store a boolean value. unsigned boolNumBits{8}; - /// Whether to emulate non-32-bit scalar types with 32-bit scalar types if - /// no native support. + /// Whether to emulate narrower scalar types with 32-bit scalar types if not + /// supported by the target. /// /// Non-32-bit scalar types require special hardware support that may not /// exist on all GPUs. This is reflected in SPIR-V as that non-32-bit scalar /// types require special capabilities or extensions. This option controls - /// whether to use 32-bit types to emulate, if a scalar type of a certain - /// bitwidth is not supported in the target environment. This requires the - /// runtime to also feed in data with a matched bitwidth and layout for - /// interface types. The runtime can do that by inspecting the SPIR-V - /// module. + /// whether to use 32-bit types to emulate < 32-bits-wide scalars, if a scalar + /// type of a certain bitwidth is not supported in the target environment. + /// This requires the runtime to also feed in data with a matched bitwidth and + /// layout for interface types. The runtime can do that by inspecting the + /// SPIR-V module. /// /// If the original scalar type has less than 32-bit, a multiple of its /// values will be packed into one 32-bit value to be memory efficient. - bool emulateNon32BitScalarTypes{true}; + bool emulateLT32BitScalarTypes{true}; /// Use 64-bit integers to convert index types. bool use64bitIndex{false}; diff --git a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp index 2452928dd4503..cf65beb924fb7 100644 --- a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp +++ b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp @@ -1031,7 +1031,7 @@ struct ConvertArithToSPIRVPass auto target = SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; options.enableFastMathMode = this->enableFastMath; SPIRVTypeConverter typeConverter(targetAttr, options); diff --git a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp index 0d1e8b8079465..d8aecae257b46 100644 --- a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp +++ b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp @@ -41,7 +41,7 @@ void ConvertControlFlowToSPIRVPass::runOnOperation() { SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp b/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp index a82ba5dd12a5d..9fffc5e3182e9 100644 --- a/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp +++ b/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp @@ -40,7 +40,7 @@ void ConvertFuncToSPIRVPass::runOnOperation() { SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp b/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp index 6b1145c464787..313172614268d 100644 --- a/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp +++ b/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp @@ -38,7 +38,7 @@ class ConvertTensorToSPIRVPass SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 2514cfe0301a1..286ff0b7eff2d 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -220,9 +220,16 @@ static Type convertScalarType(const spirv::TargetEnv &targetEnv, // Otherwise we need to adjust the type, which really means adjusting the // bitwidth given this is a scalar type. + if (!options.emulateLT32BitScalarTypes) + return nullptr; - if (!options.emulateNon32BitScalarTypes) + // We only emulate narrower scalar types here and do not truncate results. + if (type.getIntOrFloatBitWidth() > 32) { + LLVM_DEBUG(llvm::dbgs() + << type + << " not converted to 32-bit for SPIR-V to avoid truncation\n"); return nullptr; + } if (auto floatType = type.dyn_cast()) { LLVM_DEBUG(llvm::dbgs() << type << " converted to 32-bit for SPIR-V\n"); diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir index 967adbc84a3bb..f6e84e80bbf51 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir @@ -49,7 +49,15 @@ func.func @int_vector4_invalid(%arg0: vector<2xi16>) { // ----- -func.func @unsupported_constant_0() { +func.func @unsupported_constant_i64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %0 = arith.constant 0 : i64 + return +} + +// ----- + +func.func @unsupported_constant_i64_1() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} %0 = arith.constant 4294967296 : i64 // 2^32 return @@ -57,16 +65,68 @@ func.func @unsupported_constant_0() { // ----- -func.func @unsupported_constant_1() { +func.func @unsupported_constant_vector_2xi64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %1 = arith.constant dense<0> : vector<2xi64> + return +} + +// ----- + +func.func @unsupported_constant_f64_0() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} - %1 = arith.constant -2147483649 : i64 // -2^31 - 1 + %1 = arith.constant 0.0 : f64 return } // ----- -func.func @unsupported_constant_2() { +func.func @unsupported_constant_vector_2xf64_0() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} - %2 = arith.constant -2147483649 : i64 // -2^31 - 1 + %1 = arith.constant dense<0.0> : vector<2xf64> return } + +// ----- + +func.func @unsupported_constant_tensor_2xf64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %1 = arith.constant dense<0.0> : tensor<2xf64> + return +} + +///===----------------------------------------------------------------------===// +// Type emulation +//===----------------------------------------------------------------------===// + +// ----- + +module attributes { + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + +// Check that we do not emualte i64 by truncating to i32. +func.func @unsupported_i64(%arg0: i64) { + // expected-error@+1 {{failed to legalize operation 'arith.addi'}} + %2 = arith.addi %arg0, %arg0: i64 + return +} + +} // end module + +// ----- + +module attributes { + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + +// Check that we do not emualte f64 by truncating to i32. +func.func @unsupported_f64(%arg0: f64) { + // expected-error@+1 {{failed to legalize operation 'arith.addf'}} + %2 = arith.addf %arg0, %arg0: f64 + return +} + +} // end module diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir index df6806a0e4bd1..d561cd2c26f29 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir @@ -513,7 +513,7 @@ func.func @constant_size1() { // ----- -// Check that constants are converted to 32-bit when no special capability. +// Check that constants are widened to 32-bit when no special capability. module attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> } { @@ -533,51 +533,26 @@ func.func @constant_16bit() { return } -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf64> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - // CHECK-LABEL: @constant_size1 func.func @constant_size1() { // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant dense<4> : vector<1xi64> + %0 = arith.constant dense<4> : vector<1xi16> // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant dense<5.0> : tensor<1xf64> + %1 = arith.constant dense<5.0> : tensor<1xf16> return } // CHECK-LABEL: @corner_cases func.func @corner_cases() { - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %0 = arith.constant 4294967295 : i64 // 2^32 - 1 - // CHECK: %{{.*}} = spirv.Constant 2147483647 : i32 - %1 = arith.constant 2147483647 : i64 // 2^31 - 1 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %2 = arith.constant 2147483648 : i64 // 2^31 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %3 = arith.constant -2147483648 : i64 // -2^31 - - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %5 = arith.constant -1 : i64 + // CHECK: %{{.*}} = spirv.Constant -1 : i32 + %5 = arith.constant -1 : i16 // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %6 = arith.constant -2 : i64 + %6 = arith.constant -2 : i16 // CHECK: %{{.*}} = spirv.Constant -1 : i32 %7 = arith.constant -1 : index // CHECK: %{{.*}} = spirv.Constant -2 : i32 %8 = arith.constant -2 : index - // CHECK: spirv.Constant false %9 = arith.constant false // CHECK: spirv.Constant true @@ -903,29 +878,13 @@ module attributes { } { // CHECK-LABEL: @fptrunc1 -// CHECK-SAME: %[[A:.*]]: f64 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f64 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0: f16 -} - -// CHECK-LABEL: @fptrunc2 // CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fptrunc2(%arg0: f32) -> f16 { +func.func @fptrunc1(%arg0: f32) -> f16 { // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 %0 = arith.truncf %arg0 : f32 to f16 return %0: f16 } -// CHECK-LABEL: @sitofp -func.func @sitofp(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0: f64 -} - } // end module // ----- @@ -1209,11 +1168,9 @@ func.func @int_vector23(%arg0: vector<2xi8>, %arg1: vector<3xi16>) { } // CHECK-LABEL: @float_scalar -func.func @float_scalar(%arg0: f16, %arg1: f64) { +func.func @float_scalar(%arg0: f16) { // CHECK: spirv.FAdd %{{.*}}, %{{.*}}: f32 %0 = arith.addf %arg0, %arg0: f16 - // CHECK: spirv.FMul %{{.*}}, %{{.*}}: f32 - %1 = arith.mulf %arg1, %arg1: f64 return } @@ -1513,74 +1470,6 @@ func.func @constant_64bit() { // ----- -// Check that constants are converted to 32-bit when no special capability. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @constant_16bit -func.func @constant_16bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i16 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f16 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi16> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf16> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf64> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - -// CHECK-LABEL: @corner_cases -func.func @corner_cases() { - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %0 = arith.constant 4294967295 : i64 // 2^32 - 1 - // CHECK: %{{.*}} = spirv.Constant 2147483647 : i32 - %1 = arith.constant 2147483647 : i64 // 2^31 - 1 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %2 = arith.constant 2147483648 : i64 // 2^31 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %3 = arith.constant -2147483648 : i64 // -2^31 - - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %5 = arith.constant -1 : i64 - // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %6 = arith.constant -2 : i64 - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %7 = arith.constant -1 : index - // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %8 = arith.constant -2 : index - - - // CHECK: spirv.Constant false - %9 = arith.constant false - // CHECK: spirv.Constant true - %10 = arith.constant true - - return -} - -} // end module - -// ----- - //===----------------------------------------------------------------------===// // std cast ops //===----------------------------------------------------------------------===// @@ -1847,39 +1736,3 @@ func.func @fpext2(%arg0 : f32) -> f64 { } } // end module - -// ----- - -// Checks that cast types will be adjusted when missing special capabilities for -// certain non-32-bit scalar types. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @fptrunc1 -// CHECK-SAME: %[[A:.*]]: f64 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f64 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0: f16 -} - -// CHECK-LABEL: @fptrunc2 -// CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fptrunc2(%arg0: f32) -> f16 { - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f32 to f16 - return %0: f16 -} - -// CHECK-LABEL: @sitofp -func.func @sitofp(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0: f64 -} - -} // end module - -// ----- diff --git a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir index 4f1cd09efec30..d207ecd71c3cb 100644 --- a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir +++ b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -split-input-file -convert-func-to-spirv %s -o - | FileCheck %s -// RUN: mlir-opt -split-input-file -convert-func-to-spirv="emulate-non-32-bit-scalar-types=false" %s -o - | FileCheck %s --check-prefix=NOEMU +// RUN: mlir-opt -split-input-file -convert-func-to-spirv="emulate-lt-32-bit-scalar-types=false" %s | \ +// RUN: FileCheck %s --check-prefix=NOEMU //===----------------------------------------------------------------------===// // Integer types @@ -15,7 +16,7 @@ module attributes { // CHECK-SAME: i32 // CHECK-SAME: si32 // CHECK-SAME: ui32 -// NOEMU-LABEL: func @integer8 +// NOEMU-LABEL: func.func @integer8 // NOEMU-SAME: i8 // NOEMU-SAME: si8 // NOEMU-SAME: ui8 @@ -25,16 +26,17 @@ func.func @integer8(%arg0: i8, %arg1: si8, %arg2: ui8) { return } // CHECK-SAME: i32 // CHECK-SAME: si32 // CHECK-SAME: ui32 -// NOEMU-LABEL: func @integer16 +// NOEMU-LABEL: func.func @integer16 // NOEMU-SAME: i16 // NOEMU-SAME: si16 // NOEMU-SAME: ui16 func.func @integer16(%arg0: i16, %arg1: si16, %arg2: ui16) { return } -// CHECK-LABEL: spirv.func @integer64 -// CHECK-SAME: i32 -// CHECK-SAME: si32 -// CHECK-SAME: ui32 +// We do not truncate 64-bit types to 32-bit ones. +// CHECK-LABEL: func.func @integer64 +// CHECK-SAME: i64 +// CHECK-SAME: si64 +// CHECK-SAME: ui64 // NOEMU-LABEL: func @integer64 // NOEMU-SAME: i64 // NOEMU-SAME: si64 @@ -131,13 +133,13 @@ module attributes { // CHECK-LABEL: spirv.func @float16 // CHECK-SAME: f32 -// NOEMU-LABEL: func @float16 +// NOEMU-LABEL: func.func @float16 // NOEMU-SAME: f16 func.func @float16(%arg0: f16) { return } -// CHECK-LABEL: spirv.func @float64 -// CHECK-SAME: f32 -// NOEMU-LABEL: func @float64 +// CHECK-LABEL: func.func @float64 +// CHECK-SAME: f64 +// NOEMU-LABEL: func.func @float64 // NOEMU-SAME: f64 func.func @float64(%arg0: f64) { return } @@ -184,7 +186,7 @@ func.func @bf16_type(%arg0: bf16) { return } //===----------------------------------------------------------------------===// // Check that capabilities for scalar types affects vector types too: no special -// capabilities available means using turning element types to 32-bit. +// capabilities available means widening element types to 32-bit. module attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> } { @@ -192,19 +194,15 @@ module attributes { // CHECK-LABEL: spirv.func @int_vector // CHECK-SAME: vector<2xi32> // CHECK-SAME: vector<3xsi32> -// CHECK-SAME: vector<4xui32> func.func @int_vector( %arg0: vector<2xi8>, - %arg1: vector<3xsi16>, - %arg2: vector<4xui64> + %arg1: vector<3xsi16> ) { return } // CHECK-LABEL: spirv.func @float_vector // CHECK-SAME: vector<2xf32> -// CHECK-SAME: vector<3xf32> func.func @float_vector( - %arg0: vector<2xf16>, - %arg1: vector<3xf64> + %arg0: vector<2xf16> ) { return } // CHECK-LABEL: spirv.func @one_element_vector @@ -389,33 +387,35 @@ func.func @memref_16bit_Input(%arg3: memref<16xf16, #spirv.storage_class> // NOEMU-SAME: memref<16xf16, #spirv.storage_class> func.func @memref_16bit_Output(%arg4: memref<16xf16, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_StorageBuffer -// CHECK-SAME: !spirv.ptr [0])>, StorageBuffer> -// NOEMU-LABEL: func @memref_64bit_StorageBuffer +// We do not truncate i64 to i32. + +// CHECK-LABEL: func.func @memref_64bit_StorageBuffer +// CHECK-SAME: memref<16xi64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_StorageBuffer // NOEMU-SAME: memref<16xi64, #spirv.storage_class> func.func @memref_64bit_StorageBuffer(%arg0: memref<16xi64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Uniform -// CHECK-SAME: !spirv.ptr [0])>, Uniform> -// NOEMU-LABEL: func @memref_64bit_Uniform +// CHECK-LABEL: func.func @memref_64bit_Uniform +// CHECK-SAME: memref<16xsi64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Uniform // NOEMU-SAME: memref<16xsi64, #spirv.storage_class> func.func @memref_64bit_Uniform(%arg0: memref<16xsi64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_PushConstant -// CHECK-SAME: !spirv.ptr [0])>, PushConstant> -// NOEMU-LABEL: func @memref_64bit_PushConstant +// CHECK-LABEL: func.func @memref_64bit_PushConstant +// CHECK-SAME: memref<16xui64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_PushConstant // NOEMU-SAME: memref<16xui64, #spirv.storage_class> func.func @memref_64bit_PushConstant(%arg0: memref<16xui64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Input -// CHECK-SAME: !spirv.ptr)>, Input> -// NOEMU-LABEL: func @memref_64bit_Input +// CHECK-LABEL: func.func @memref_64bit_Input +// CHECK-SAME: memref<16xf64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Input // NOEMU-SAME: memref<16xf64, #spirv.storage_class> func.func @memref_64bit_Input(%arg3: memref<16xf64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Output -// CHECK-SAME: !spirv.ptr)>, Output> -// NOEMU-LABEL: func @memref_64bit_Output +// CHECK-LABEL: func.func @memref_64bit_Output +// CHECK-SAME: memref<16xf64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Output // NOEMU-SAME: memref<16xf64, #spirv.storage_class> func.func @memref_64bit_Output(%arg4: memref<16xf64, #spirv.storage_class>) { return } @@ -791,9 +791,7 @@ module attributes { // CHECK-SAME: !spirv.array<32 x i32> // CHECK-SAME: !spirv.array<32 x i32> // CHECK-SAME: !spirv.array<32 x i32> -// CHECK-SAME: !spirv.array<32 x i32> func.func @int_tensor_types( - %arg0: tensor<8x4xi64>, %arg1: tensor<8x4xi32>, %arg2: tensor<8x4xi16>, %arg3: tensor<8x4xi8> @@ -802,9 +800,7 @@ func.func @int_tensor_types( // CHECK-LABEL: spirv.func @float_tensor_types // CHECK-SAME: !spirv.array<32 x f32> // CHECK-SAME: !spirv.array<32 x f32> -// CHECK-SAME: !spirv.array<32 x f32> func.func @float_tensor_types( - %arg0: tensor<8x4xf64>, %arg1: tensor<8x4xf32>, %arg2: tensor<8x4xf16> ) { return } From 6edb49eee2fd05c35642110f35766584428857b7 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Fri, 4 Nov 2022 15:10:58 -0400 Subject: [PATCH 275/516] [mlir][arith][spirv] Remove duplicate test cases Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D137166 --- .../ArithToSPIRV/arith-to-spirv.mlir | 574 +----------------- 1 file changed, 11 insertions(+), 563 deletions(-) diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir index d561cd2c26f29..2f7fb592c896a 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir @@ -282,6 +282,15 @@ func.func @cmpf(%arg0 : f32, %arg1 : f32) { return } +// CHECK-LABEL: @vec1cmpf +func.func @vec1cmpf(%arg0 : vector<1xf32>, %arg1 : vector<1xf32>) { + // CHECK: spirv.FOrdGreaterThan + %0 = arith.cmpf ogt, %arg0, %arg1 : vector<1xf32> + // CHECK: spirv.FUnordLessThan + %1 = arith.cmpf ult, %arg0, %arg1 : vector<1xf32> + return +} + } // end module // ----- @@ -466,9 +475,9 @@ func.func @constant() { // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> %8 = arith.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> + %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> + %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> return } @@ -1175,564 +1184,3 @@ func.func @float_scalar(%arg0: f16) { } } // end module - -// ----- - -//===----------------------------------------------------------------------===// -// std bit ops -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @bitwise_scalar -func.func @bitwise_scalar(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.BitwiseAnd - %0 = arith.andi %arg0, %arg1 : i32 - // CHECK: spirv.BitwiseOr - %1 = arith.ori %arg0, %arg1 : i32 - // CHECK: spirv.BitwiseXor - %2 = arith.xori %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @bitwise_vector -func.func @bitwise_vector(%arg0 : vector<4xi32>, %arg1 : vector<4xi32>) { - // CHECK: spirv.BitwiseAnd - %0 = arith.andi %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.BitwiseOr - %1 = arith.ori %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.BitwiseXor - %2 = arith.xori %arg0, %arg1 : vector<4xi32> - return -} - -// CHECK-LABEL: @logical_scalar -func.func @logical_scalar(%arg0 : i1, %arg1 : i1) { - // CHECK: spirv.LogicalAnd - %0 = arith.andi %arg0, %arg1 : i1 - // CHECK: spirv.LogicalOr - %1 = arith.ori %arg0, %arg1 : i1 - // CHECK: spirv.LogicalNotEqual - %2 = arith.xori %arg0, %arg1 : i1 - return -} - -// CHECK-LABEL: @logical_vector -func.func @logical_vector(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) { - // CHECK: spirv.LogicalAnd - %0 = arith.andi %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalOr - %1 = arith.ori %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalNotEqual - %2 = arith.xori %arg0, %arg1 : vector<4xi1> - return -} - -// CHECK-LABEL: @shift_scalar -func.func @shift_scalar(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.ShiftLeftLogical - %0 = arith.shli %arg0, %arg1 : i32 - // CHECK: spirv.ShiftRightArithmetic - %1 = arith.shrsi %arg0, %arg1 : i32 - // CHECK: spirv.ShiftRightLogical - %2 = arith.shrui %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @shift_vector -func.func @shift_vector(%arg0 : vector<4xi32>, %arg1 : vector<4xi32>) { - // CHECK: spirv.ShiftLeftLogical - %0 = arith.shli %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.ShiftRightArithmetic - %1 = arith.shrsi %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.ShiftRightLogical - %2 = arith.shrui %arg0, %arg1 : vector<4xi32> - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.cmpf -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: spirv.FOrdEqual - %1 = arith.cmpf oeq, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdGreaterThan - %2 = arith.cmpf ogt, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdGreaterThanEqual - %3 = arith.cmpf oge, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdLessThan - %4 = arith.cmpf olt, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdLessThanEqual - %5 = arith.cmpf ole, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdNotEqual - %6 = arith.cmpf one, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordEqual - %7 = arith.cmpf ueq, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordGreaterThan - %8 = arith.cmpf ugt, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordGreaterThanEqual - %9 = arith.cmpf uge, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordLessThan - %10 = arith.cmpf ult, %arg0, %arg1 : f32 - // CHECK: FUnordLessThanEqual - %11 = arith.cmpf ule, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordNotEqual - %12 = arith.cmpf une, %arg0, %arg1 : f32 - return -} - -// CHECK-LABEL: @vec1cmpf -func.func @vec1cmpf(%arg0 : vector<1xf32>, %arg1 : vector<1xf32>) { - // CHECK: spirv.FOrdGreaterThan - %0 = arith.cmpf ogt, %arg0, %arg1 : vector<1xf32> - // CHECK: spirv.FUnordLessThan - %1 = arith.cmpf ult, %arg0, %arg1 : vector<1xf32> - return -} - -} // end module - -// ----- - -// With Kernel capability, we can convert NaN check to spirv.Ordered/spirv.Unordered. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: spirv.Ordered - %0 = arith.cmpf ord, %arg0, %arg1 : f32 - // CHECK: spirv.Unordered - %1 = arith.cmpf uno, %arg0, %arg1 : f32 - return -} - -} // end module - -// ----- - -// Without Kernel capability, we need to convert NaN check to spirv.IsNan. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -// CHECK-SAME: %[[LHS:.+]]: f32, %[[RHS:.+]]: f32 -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: %[[LHS_NAN:.+]] = spirv.IsNan %[[LHS]] : f32 - // CHECK-NEXT: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32 - // CHECK-NEXT: %[[OR:.+]] = spirv.LogicalOr %[[LHS_NAN]], %[[RHS_NAN]] : i1 - // CHECK-NEXT: %{{.+}} = spirv.LogicalNot %[[OR]] : i1 - %0 = arith.cmpf ord, %arg0, %arg1 : f32 - - // CHECK-NEXT: %[[LHS_NAN:.+]] = spirv.IsNan %[[LHS]] : f32 - // CHECK-NEXT: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32 - // CHECK-NEXT: %{{.+}} = spirv.LogicalOr %[[LHS_NAN]], %[[RHS_NAN]] : i1 - %1 = arith.cmpf uno, %arg0, %arg1 : f32 - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.cmpi -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpi -func.func @cmpi(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.IEqual - %0 = arith.cmpi eq, %arg0, %arg1 : i32 - // CHECK: spirv.INotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : i32 - // CHECK: spirv.SLessThan - %2 = arith.cmpi slt, %arg0, %arg1 : i32 - // CHECK: spirv.SLessThanEqual - %3 = arith.cmpi sle, %arg0, %arg1 : i32 - // CHECK: spirv.SGreaterThan - %4 = arith.cmpi sgt, %arg0, %arg1 : i32 - // CHECK: spirv.SGreaterThanEqual - %5 = arith.cmpi sge, %arg0, %arg1 : i32 - // CHECK: spirv.ULessThan - %6 = arith.cmpi ult, %arg0, %arg1 : i32 - // CHECK: spirv.ULessThanEqual - %7 = arith.cmpi ule, %arg0, %arg1 : i32 - // CHECK: spirv.UGreaterThan - %8 = arith.cmpi ugt, %arg0, %arg1 : i32 - // CHECK: spirv.UGreaterThanEqual - %9 = arith.cmpi uge, %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @boolcmpi -func.func @boolcmpi(%arg0 : i1, %arg1 : i1) { - // CHECK: spirv.LogicalEqual - %0 = arith.cmpi eq, %arg0, %arg1 : i1 - // CHECK: spirv.LogicalNotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : i1 - return -} - -// CHECK-LABEL: @vecboolcmpi -func.func @vecboolcmpi(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) { - // CHECK: spirv.LogicalEqual - %0 = arith.cmpi eq, %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalNotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : vector<4xi1> - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.constant -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env< - #spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @constant -func.func @constant() { - // CHECK: spirv.Constant true - %0 = arith.constant true - // CHECK: spirv.Constant 42 : i32 - %1 = arith.constant 42 : i32 - // CHECK: spirv.Constant 5.000000e-01 : f32 - %2 = arith.constant 0.5 : f32 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %3 = arith.constant dense<[2, 3]> : vector<2xi32> - // CHECK: spirv.Constant 1 : i32 - %4 = arith.constant 1 : index - // CHECK: spirv.Constant dense<1> : tensor<6xi32> : !spirv.array<6 x i32> - %5 = arith.constant dense<1> : tensor<2x3xi32> - // CHECK: spirv.Constant dense<1.000000e+00> : tensor<6xf32> : !spirv.array<6 x f32> - %6 = arith.constant dense<1.0> : tensor<2x3xf32> - // CHECK: spirv.Constant dense<{{\[}}1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf32> : !spirv.array<6 x f32> - %7 = arith.constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %8 = arith.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> - return -} - -// CHECK-LABEL: @constant_16bit -func.func @constant_16bit() { - // CHECK: spirv.Constant 4 : i16 - %0 = arith.constant 4 : i16 - // CHECK: spirv.Constant 5.000000e+00 : f16 - %1 = arith.constant 5.0 : f16 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi16> - %2 = arith.constant dense<[2, 3]> : vector<2xi16> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf16> : !spirv.array<5 x f16> - %3 = arith.constant dense<4.0> : tensor<5xf16> - return -} - -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i64 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f64 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi64> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf64> : !spirv.array<5 x f64> - %3 = arith.constant dense<4.0> : tensor<5xf64> - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// std cast ops -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env< - #spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: index_cast1 -func.func @index_cast1(%arg0: i16) { - // CHECK: spirv.SConvert %{{.+}} : i16 to i32 - %0 = arith.index_cast %arg0 : i16 to index - return -} - -// CHECK-LABEL: index_cast2 -func.func @index_cast2(%arg0: index) { - // CHECK: spirv.SConvert %{{.+}} : i32 to i16 - %0 = arith.index_cast %arg0 : index to i16 - return -} - -// CHECK-LABEL: index_cast3 -func.func @index_cast3(%arg0: i32) { - // CHECK-NOT: spirv.SConvert - %0 = arith.index_cast %arg0 : i32 to index - return -} - -// CHECK-LABEL: index_cast4 -func.func @index_cast4(%arg0: index) { - // CHECK-NOT: spirv.SConvert - %0 = arith.index_cast %arg0 : index to i32 - return -} - -// CHECK-LABEL: @fpext1 -func.func @fpext1(%arg0: f16) -> f64 { - // CHECK: spirv.FConvert %{{.*}} : f16 to f64 - %0 = arith.extf %arg0 : f16 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @fpext2 -func.func @fpext2(%arg0 : f32) -> f64 { - // CHECK: spirv.FConvert %{{.*}} : f32 to f64 - %0 = arith.extf %arg0 : f32 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @fptrunc1 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: spirv.FConvert %{{.*}} : f64 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0 : f16 -} - -// CHECK-LABEL: @fptrunc2 -func.func @fptrunc2(%arg0: f32) -> f16 { - // CHECK: spirv.FConvert %{{.*}} : f32 to f16 - %0 = arith.truncf %arg0 : f32 to f16 - return %0 : f16 -} - -// CHECK-LABEL: @sitofp1 -func.func @sitofp1(%arg0 : i32) -> f32 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i32 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @sitofp2 -func.func @sitofp2(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i64 to f64 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @uitofp_i16_f32 -func.func @uitofp_i16_f32(%arg0: i16) -> f32 { - // CHECK: spirv.ConvertUToF %{{.*}} : i16 to f32 - %0 = arith.uitofp %arg0 : i16 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i32_f32 -func.func @uitofp_i32_f32(%arg0 : i32) -> f32 { - // CHECK: spirv.ConvertUToF %{{.*}} : i32 to f32 - %0 = arith.uitofp %arg0 : i32 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i1_f32 -func.func @uitofp_i1_f32(%arg0 : i1) -> f32 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0.000000e+00 : f32 - // CHECK: %[[ONE:.+]] = spirv.Constant 1.000000e+00 : f32 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, f32 - %0 = arith.uitofp %arg0 : i1 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i1_f64 -func.func @uitofp_i1_f64(%arg0 : i1) -> f64 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0.000000e+00 : f64 - // CHECK: %[[ONE:.+]] = spirv.Constant 1.000000e+00 : f64 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, f64 - %0 = arith.uitofp %arg0 : i1 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @uitofp_vec_i1_f32 -func.func @uitofp_vec_i1_f32(%arg0 : vector<4xi1>) -> vector<4xf32> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0.000000e+00> : vector<4xf32> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf32> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xf32> - %0 = arith.uitofp %arg0 : vector<4xi1> to vector<4xf32> - return %0 : vector<4xf32> -} - -// CHECK-LABEL: @uitofp_vec_i1_f64 -spirv.func @uitofp_vec_i1_f64(%arg0: vector<4xi1>) -> vector<4xf64> "None" { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0.000000e+00> : vector<4xf64> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf64> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xf64> - %0 = spirv.Constant dense<0.000000e+00> : vector<4xf64> - %1 = spirv.Constant dense<1.000000e+00> : vector<4xf64> - %2 = spirv.Select %arg0, %1, %0 : vector<4xi1>, vector<4xf64> - spirv.ReturnValue %2 : vector<4xf64> -} - -// CHECK-LABEL: @sexti1 -func.func @sexti1(%arg0: i16) -> i64 { - // CHECK: spirv.SConvert %{{.*}} : i16 to i64 - %0 = arith.extsi %arg0 : i16 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @sexti2 -func.func @sexti2(%arg0 : i32) -> i64 { - // CHECK: spirv.SConvert %{{.*}} : i32 to i64 - %0 = arith.extsi %arg0 : i32 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti1 -func.func @zexti1(%arg0: i16) -> i64 { - // CHECK: spirv.UConvert %{{.*}} : i16 to i64 - %0 = arith.extui %arg0 : i16 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti2 -func.func @zexti2(%arg0 : i32) -> i64 { - // CHECK: spirv.UConvert %{{.*}} : i32 to i64 - %0 = arith.extui %arg0 : i32 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti3 -func.func @zexti3(%arg0 : i1) -> i32 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0 : i32 - // CHECK: %[[ONE:.+]] = spirv.Constant 1 : i32 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, i32 - %0 = arith.extui %arg0 : i1 to i32 - return %0 : i32 -} - -// CHECK-LABEL: @zexti4 -func.func @zexti4(%arg0 : vector<4xi1>) -> vector<4xi32> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0> : vector<4xi32> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1> : vector<4xi32> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xi32> - %0 = arith.extui %arg0 : vector<4xi1> to vector<4xi32> - return %0 : vector<4xi32> -} - -// CHECK-LABEL: @zexti5 -func.func @zexti5(%arg0 : vector<4xi1>) -> vector<4xi64> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0> : vector<4xi64> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1> : vector<4xi64> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xi64> - %0 = arith.extui %arg0 : vector<4xi1> to vector<4xi64> - return %0 : vector<4xi64> -} - -// CHECK-LABEL: @trunci1 -func.func @trunci1(%arg0 : i64) -> i16 { - // CHECK: spirv.SConvert %{{.*}} : i64 to i16 - %0 = arith.trunci %arg0 : i64 to i16 - return %0 : i16 -} - -// CHECK-LABEL: @trunci2 -func.func @trunci2(%arg0: i32) -> i16 { - // CHECK: spirv.SConvert %{{.*}} : i32 to i16 - %0 = arith.trunci %arg0 : i32 to i16 - return %0 : i16 -} - -// CHECK-LABEL: @trunc_to_i1 -func.func @trunc_to_i1(%arg0: i32) -> i1 { - // CHECK: %[[MASK:.*]] = spirv.Constant 1 : i32 - // CHECK: %[[MASKED_SRC:.*]] = spirv.BitwiseAnd %{{.*}}, %[[MASK]] : i32 - // CHECK: %[[IS_ONE:.*]] = spirv.IEqual %[[MASKED_SRC]], %[[MASK]] : i32 - // CHECK-DAG: %[[TRUE:.*]] = spirv.Constant true - // CHECK-DAG: %[[FALSE:.*]] = spirv.Constant false - // CHECK: spirv.Select %[[IS_ONE]], %[[TRUE]], %[[FALSE]] : i1, i1 - %0 = arith.trunci %arg0 : i32 to i1 - return %0 : i1 -} - -// CHECK-LABEL: @trunc_to_veci1 -func.func @trunc_to_veci1(%arg0: vector<4xi32>) -> vector<4xi1> { - // CHECK: %[[MASK:.*]] = spirv.Constant dense<1> : vector<4xi32> - // CHECK: %[[MASKED_SRC:.*]] = spirv.BitwiseAnd %{{.*}}, %[[MASK]] : vector<4xi32> - // CHECK: %[[IS_ONE:.*]] = spirv.IEqual %[[MASKED_SRC]], %[[MASK]] : vector<4xi32> - // CHECK-DAG: %[[TRUE:.*]] = spirv.Constant dense : vector<4xi1> - // CHECK-DAG: %[[FALSE:.*]] = spirv.Constant dense : vector<4xi1> - // CHECK: spirv.Select %[[IS_ONE]], %[[TRUE]], %[[FALSE]] : vector<4xi1>, vector<4xi1> - %0 = arith.trunci %arg0 : vector<4xi32> to vector<4xi1> - return %0 : vector<4xi1> -} - -// CHECK-LABEL: @fptosi1 -func.func @fptosi1(%arg0 : f32) -> i32 { - // CHECK: spirv.ConvertFToS %{{.*}} : f32 to i32 - %0 = arith.fptosi %arg0 : f32 to i32 - return %0 : i32 -} - -// CHECK-LABEL: @fptosi2 -func.func @fptosi2(%arg0 : f16) -> i16 { - // CHECK: spirv.ConvertFToS %{{.*}} : f16 to i16 - %0 = arith.fptosi %arg0 : f16 to i16 - return %0 : i16 -} - -} // end module - -// ----- - -// Checks that cast types will be adjusted when missing special capabilities for -// certain non-32-bit scalar types. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @fpext1 -// CHECK-SAME: %[[A:.*]]: f16 -func.func @fpext1(%arg0: f16) -> f64 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f16 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f64 - %0 = arith.extf %arg0 : f16 to f64 - return %0: f64 -} - -// CHECK-LABEL: @fpext2 -// CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fpext2(%arg0 : f32) -> f64 { - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f64 - %0 = arith.extf %arg0 : f32 to f64 - return %0: f64 -} - -} // end module From 8c33639aa65d0771472abb28718f178d0ffa923d Mon Sep 17 00:00:00 2001 From: Hanhan Wang Date: Wed, 2 Nov 2022 18:09:46 -0700 Subject: [PATCH 276/516] [mlir] Move the assertion to a valid place. The defining Op may live in an unlinked block so its parent Op may be null. Only assert it when the parent Op is not null. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D137306 --- mlir/lib/Analysis/SliceAnalysis.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index e343c47bb0f01..4684ccfa21b8a 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -98,10 +98,11 @@ static void getBackwardSliceImpl(Operation *op, // TODO: determine whether we want to recurse backward into the other // blocks of parentOp, which are not technically backward unless they flow // into us. For now, just bail. - assert(parentOp->getNumRegions() == 1 && - parentOp->getRegion(0).getBlocks().size() == 1); - if (backwardSlice->count(parentOp) == 0) + if (parentOp && backwardSlice->count(parentOp) == 0) { + assert(parentOp->getNumRegions() == 1 && + parentOp->getRegion(0).getBlocks().size() == 1); getBackwardSliceImpl(parentOp, backwardSlice, filter); + } } else { llvm_unreachable("No definingOp and not a block argument."); } From c954cfeb57a1c8c0996a34da64243bc7f7fe1107 Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Thu, 3 Nov 2022 08:50:08 -0700 Subject: [PATCH 277/516] Some uses of the preprocessor can result in multiple target regions on the same line. Cases such as those in the associated lit tests, can now be supported. This adds a 'Count' field to TargetRegionEntryInfo to differentiate regions with the same source position. The OffloadEntriesInfoManager routines are updated to maintain a count of regions seen at a location. The registration of regions proceeds that same as before, but now the next available count is always determined and used in the offload entry. Fixes: https://github.com/llvm/llvm-project/issues/52707 Differential Revision: https://reviews.llvm.org/D134816 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 10 ++- clang/test/OpenMP/Inputs/multiple_regions.inc | 4 + .../test/OpenMP/multiple_regions_per_line.cpp | 73 +++++++++++++++++++ .../OpenMP/target_codegen_registration.cpp | 48 ++++++------ .../target_codegen_registration_naming.cpp | 8 +- .../target_parallel_codegen_registration.cpp | 48 ++++++------ ...t_parallel_codegen_registration_naming.cpp | 8 +- ...rget_parallel_for_codegen_registration.cpp | 48 ++++++------ ...rallel_for_codegen_registration_naming.cpp | 8 +- ...parallel_for_simd_codegen_registration.cpp | 48 ++++++------ ...l_for_simd_codegen_registration_naming.cpp | 8 +- .../target_simd_codegen_registration.cpp | 48 ++++++------ ...arget_simd_codegen_registration_naming.cpp | 8 +- .../target_teams_codegen_registration.cpp | 48 ++++++------ ...rget_teams_codegen_registration_naming.cpp | 8 +- ..._teams_distribute_codegen_registration.cpp | 48 ++++++------ ...distribute_codegen_registration_naming.cpp | 8 +- ...parallel_for_simd_codegen_registration.cpp | 48 ++++++------ ...l_for_simd_codegen_registration_naming.cpp | 8 +- ...s_distribute_simd_codegen_registration.cpp | 48 ++++++------ ...ibute_simd_codegen_registration_naming.cpp | 8 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 45 +++++++++--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 53 +++++++++++--- .../Frontend/OpenMPIRBuilderTest.cpp | 4 +- 24 files changed, 414 insertions(+), 279 deletions(-) create mode 100644 clang/test/OpenMP/Inputs/multiple_regions.inc create mode 100644 clang/test/OpenMP/multiple_regions_per_line.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 6b0908d139f47..e52989b7c139b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1858,7 +1858,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); SmallString<128> Buffer, Out; - EntryInfo.getTargetRegionEntryFnName(Buffer); + OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { @@ -6101,18 +6101,20 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( // Create a unique name for the entry function using the source location // information of the current target region. The name will be something like: // - // __omp_offloading_DD_FFFF_PP_lBB + // __omp_offloading_DD_FFFF_PP_lBB[_CC] // // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the // mangled name of the function that encloses the target region and BB is the - // line number of the target region. + // line number of the target region. CC is a count added when more than one + // region is located at the same location. const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || !CGM.getLangOpts().OpenMPOffloadMandatory; auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); + SmallString<64> EntryFnName; - EntryInfo.getTargetRegionEntryFnName(EntryFnName); + OffloadEntriesInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); diff --git a/clang/test/OpenMP/Inputs/multiple_regions.inc b/clang/test/OpenMP/Inputs/multiple_regions.inc new file mode 100644 index 0000000000000..f519bee6c5b5e --- /dev/null +++ b/clang/test/OpenMP/Inputs/multiple_regions.inc @@ -0,0 +1,4 @@ +#pragma omp target +{ + i = i + VALUE; +} diff --git a/clang/test/OpenMP/multiple_regions_per_line.cpp b/clang/test/OpenMP/multiple_regions_per_line.cpp new file mode 100644 index 0000000000000..4332ca052edce --- /dev/null +++ b/clang/test/OpenMP/multiple_regions_per_line.cpp @@ -0,0 +1,73 @@ +//RUN: %clang_cc1 -verify -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -emit-llvm -o - %s | FileCheck %s + +//RUN: %clang_cc1 -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -emit-llvm-bc -o %t-host.bc %s + +//RUN: %clang_cc1 -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -fopenmp-is-device \ +//RUN: -fopenmp-host-ir-file-path %t-host.bc -emit-llvm -o - %s \ +//RUN: | FileCheck %s --check-prefix=TCHECK + +// expected-no-diagnostics + +//CHECK: define {{.*}}void @[[FOO:.+]]( +void foo() { + int i = 0; + +//CHECK: call void @__omp_offloading_[[FILEID1:[0-9a-f]+_[0-9a-f]+]]_[[FOO]]_l[[T1L:[0-9]+]]( + +#define VALUE 1 +#include "multiple_regions.inc" + +//CHECK: call void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +#undef VALUE +#define VALUE 2 +#include "multiple_regions.inc" + +//CHECK: call void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( +#undef VALUE +#define VALUE 3 +#include "multiple_regions.inc" +} + +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1:[0-9a-f]+_[0-9a-f]+]]_[[FOO:.+]]_l[[T1L:[0-9]+]]( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( + +#define A()\ +_Pragma("omp target")\ +{}\ +_Pragma("omp target")\ +{} + +//CHECK: define {{.*}}void @[[BAR:.+]]( +void bar() +{ +//CHECK: call void @__omp_offloading_[[FILEID2:[0-9a-f]+_[0-9a-f]+]]_[[BAR]]_l[[T2L:[0-9]+]]( +//CHECK: call void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( + A() +} + +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID2:[0-9a-f]+_[0-9a-f]+]]_[[BAR:.+]]_l[[T2L:[0-9]+]]( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 1, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 2, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 1, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 1, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 2, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 1, i32 {{[0-9]+}}} diff --git a/clang/test/OpenMP/target_codegen_registration.cpp b/clang/test/OpenMP/target_codegen_registration.cpp index cd0d82e223c3f..a82af4dcb02dd 100644 --- a/clang/test/OpenMP/target_codegen_registration.cpp +++ b/clang/test/OpenMP/target_codegen_registration.cpp @@ -403,31 +403,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_codegen_registration_naming.cpp b/clang/test/OpenMP/target_codegen_registration_naming.cpp index 8bfabfdebb295..5814c37363ae3 100644 --- a/clang/test/OpenMP/target_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_codegen_registration.cpp index 568d215e85336..27149109c8327 100644 --- a/clang/test/OpenMP/target_parallel_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_codegen_registration.cpp @@ -442,31 +442,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp index b3fbf83625e96..9701478bba089 100644 --- a/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp index f76aaf15d7455..6aefa86d3e9dd 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp index 2afd858735a0f..a809cb1263402 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp index 4c3c15425b4a3..ff22cfa8b9a8b 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp index ba2d363dc455a..1eff0c1b0e65d 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_simd_codegen_registration.cpp b/clang/test/OpenMP/target_simd_codegen_registration.cpp index 0770d0d314c0c..d41dae2c39769 100644 --- a/clang/test/OpenMP/target_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp index ef8676a7b9829..a87042fb1fe70 100644 --- a/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_codegen_registration.cpp b/clang/test/OpenMP/target_teams_codegen_registration.cpp index d79553e80ea64..70288706cff07 100644 --- a/clang/test/OpenMP/target_teams_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_codegen_registration.cpp @@ -442,31 +442,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp index daf4d158a91a7..35975abdad4ac 100644 --- a/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp index 3913e43fef8ce..6cf8f4ec79f22 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp index 44c0f25aa9b12..d5312517f3397 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp index 4721dbdc4cbd2..1bba9942bbca7 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp index 3a6473a1115f2..1e52f5441f8d6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp index 9181c5c23892b..cd6cf08ce455f 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp @@ -452,32 +452,32 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} // CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp index 0421345dab9fb..52783ef7bbaed 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp @@ -77,12 +77,12 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} // TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 27d84bcb9f40a..c09306dcde086 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1735,23 +1735,24 @@ struct TargetRegionEntryInfo { unsigned DeviceID; unsigned FileID; unsigned Line; + unsigned Count; - TargetRegionEntryInfo() : ParentName(""), DeviceID(0), FileID(0), Line(0) {} + TargetRegionEntryInfo() + : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line) - : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line) { - } + unsigned FileID, unsigned Line, unsigned Count = 0) + : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), + Count(Count) {} static void getTargetRegionEntryFnName(SmallVectorImpl &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, - unsigned Line); - - void getTargetRegionEntryFnName(SmallVectorImpl &Name); + unsigned Line, unsigned Count); bool operator<(const TargetRegionEntryInfo RHS) const { - return std::make_tuple(ParentName, DeviceID, FileID, Line) < - std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line); + return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < + std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, + RHS.Count); } }; @@ -1859,14 +1860,19 @@ class OffloadEntriesInfoManager { void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order); /// Register target region entry. - void registerTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags, bool IsDevice); /// Return true if a target region entry with the provided information /// exists. - bool hasTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId = false) const; + + // Return the Name based on \a EntryInfo using the next available Count. + void getTargetRegionEntryFnName(SmallVectorImpl &Name, + const TargetRegionEntryInfo &EntryInfo); + /// brief Applies action \a Action on all registered entries. typedef function_ref @@ -1939,6 +1945,23 @@ class OffloadEntriesInfoManager { const OffloadDeviceGlobalVarEntryInfoActTy &Action); private: + /// Return the count of entries at a particular source location. + unsigned + getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; + + /// Update the count of entries at a particular source location. + void + incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); + + static TargetRegionEntryInfo + getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { + return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, + EntryInfo.FileID, EntryInfo.Line, 0); + } + + // Count of entries at a location. + std::map OffloadEntriesTargetRegionCount; + // Storage for target region entries kind. typedef std::map OffloadEntriesTargetRegionTy; diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 96ea0eb21882b..0a0e6a9386e58 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4759,12 +4759,14 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( // - Entry 3 -> Mangled name of the function where the entry was // identified. // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. + // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line. + // - Entry 6 -> Order the entry was created. // The first element of the metadata node is the kind. Metadata *Ops[] = { GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), - GetMDInt(EntryInfo.Line), GetMDInt(E.getOrder())}; + GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count), + GetMDInt(E.getOrder())}; // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo); @@ -4869,15 +4871,20 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( void TargetRegionEntryInfo::getTargetRegionEntryFnName( SmallVectorImpl &Name, StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line) { + unsigned FileID, unsigned Line, unsigned Count) { raw_svector_ostream OS(Name); OS << "__omp_offloading" << llvm::format("_%x", DeviceID) << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; + if (Count) + OS << "_" << Count; } -void TargetRegionEntryInfo::getTargetRegionEntryFnName( - SmallVectorImpl &Name) { - getTargetRegionEntryFnName(Name, ParentName, DeviceID, FileID, Line); +void OffloadEntriesInfoManager::getTargetRegionEntryFnName( + SmallVectorImpl &Name, const TargetRegionEntryInfo &EntryInfo) { + unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo); + TargetRegionEntryInfo::getTargetRegionEntryFnName( + Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID, + EntryInfo.Line, NewCount); } /// Loads all the offload entries information from the host IR @@ -4911,9 +4918,10 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata( TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), - /*Line=*/GetMDInt(4)); + /*Line=*/GetMDInt(4), + /*Count=*/GetMDInt(5)); OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - EntryInfo, /*Order=*/GetMDInt(5)); + EntryInfo, /*Order=*/GetMDInt(6)); break; } case OffloadEntriesInfoManager::OffloadEntryInfo:: @@ -4933,6 +4941,21 @@ bool OffloadEntriesInfoManager::empty() const { OffloadEntriesDeviceGlobalVar.empty(); } +unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) const { + auto It = OffloadEntriesTargetRegionCount.find( + getTargetRegionEntryCountKey(EntryInfo)); + if (It == OffloadEntriesTargetRegionCount.end()) + return 0; + return It->second; +} + +void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) { + OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] = + EntryInfo.Count + 1; +} + /// Initialize target region entry. void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo( const TargetRegionEntryInfo &EntryInfo, unsigned Order) { @@ -4943,8 +4966,13 @@ void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo( } void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( - const TargetRegionEntryInfo &EntryInfo, Constant *Addr, Constant *ID, + TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags, bool IsDevice) { + assert(EntryInfo.Count == 0 && "expected default EntryInfo"); + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (IsDevice) { @@ -4966,10 +4994,15 @@ void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( OffloadEntriesTargetRegion[EntryInfo] = Entry; ++OffloadingEntriesNum; } + incrementTargetRegionEntryInfoCount(EntryInfo); } bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo( - const TargetRegionEntryInfo &EntryInfo, bool IgnoreAddressId) const { + TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId) const { + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + auto It = OffloadEntriesTargetRegion.find(EntryInfo); if (It == OffloadEntriesTargetRegion.end()) { return false; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index b334755c35c8a..36c70ee1f54bd 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5504,8 +5504,9 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { OffloadEntriesInfoManager InfoManager; - TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4); + TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); + EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo, true)); InfoManager.initializeDeviceGlobalVarEntryInfo( "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); InfoManager.registerTargetRegionEntryInfo( @@ -5514,7 +5515,6 @@ TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { InfoManager.registerDeviceGlobalVarEntryInfo( "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, GlobalValue::WeakAnyLinkage, true); - EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo, true)); EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); } } // namespace From 687ce3dec132e0a8d37da910d4dd470b72c70298 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Fri, 4 Nov 2022 13:08:31 -0700 Subject: [PATCH 278/516] [BOLT][Hugify] Fix apple builds Fix placement of ifdefs in hugify.cpp after D129107 landed. --- bolt/runtime/hugify.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index d8a2d8b45f6cb..05c1be4f2d70c 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -6,8 +6,7 @@ // //===---------------------------------------------------------------------===// -#if defined (__x86_64__) -#if !defined(__APPLE__) +#if defined (__x86_64__) && !defined(__APPLE__) #include "common.h" @@ -136,7 +135,6 @@ static void hugifyForOldKernel(uint8_t *From, uint8_t *To) { __munmap(Mem, Size); } -#endif extern "C" void __bolt_hugify_self_impl() { uint8_t *HotStart = (uint8_t *)&__hot_start; From 369899449226a00f15422a12aacb62dee312c8d5 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Fri, 28 Oct 2022 18:34:48 -0700 Subject: [PATCH 279/516] [BOLT] Always move JTs in jump-table=move We should always move jump tables when requested. Previously, we were not moving jump tables of non-simple functions in relocation mode. That caused a bug detailed in the attached test case: in PIC jump tables, we force jump tables to be moved, but if they are not moved because the function is not simple, we could incorrectly update original entries in .rodata, corrupting it under special circumstances (see testcase). Reviewed By: #bolt, maksfb Differential Revision: https://reviews.llvm.org/D137357 --- bolt/lib/Core/BinaryEmitter.cpp | 6 +- bolt/test/X86/Inputs/jt-pic-linkerscript.ld | 10 ++ bolt/test/X86/jump-table-pic-conflict.s | 132 ++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 bolt/test/X86/Inputs/jt-pic-linkerscript.ld create mode 100644 bolt/test/X86/jump-table-pic-conflict.s diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index c81dcc31f33ba..fe02af0b85a94 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -740,10 +740,12 @@ void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) { for (auto &JTI : BF.jumpTables()) { JumpTable &JT = *JTI.second; + // Only emit shared jump tables once, when processing the first parent + if (JT.Parents.size() > 1 && JT.Parents[0] != &BF) + continue; if (opts::PrintJumpTables) JT.print(outs()); - if ((opts::JumpTables == JTS_BASIC || !BF.isSimple()) && - BC.HasRelocations) { + if (opts::JumpTables == JTS_BASIC && BC.HasRelocations) { JT.updateOriginal(); } else { MCSection *HotSection, *ColdSection; diff --git a/bolt/test/X86/Inputs/jt-pic-linkerscript.ld b/bolt/test/X86/Inputs/jt-pic-linkerscript.ld new file mode 100644 index 0000000000000..c32ffd695682c --- /dev/null +++ b/bolt/test/X86/Inputs/jt-pic-linkerscript.ld @@ -0,0 +1,10 @@ +# Linker script used by jump-table-pic-conflict.s test. +# .rodata needs to appear before .text + +SECTIONS +{ + . = 0x201120; + .rodata : { *(.rodata) } + .eh_frame : { *(.eh_frame) } + .text : { *(.text) } +} diff --git a/bolt/test/X86/jump-table-pic-conflict.s b/bolt/test/X86/jump-table-pic-conflict.s new file mode 100644 index 0000000000000..ed3c77d49b6cc --- /dev/null +++ b/bolt/test/X86/jump-table-pic-conflict.s @@ -0,0 +1,132 @@ +# Check cases when the first PIC jump table entries of one function can be +# interpreted as valid last entries of the previous function. + +# Conditions to trigger the bug: Function A and B have jump tables that +# are adjacent in memory. We run in lite relocation mode. Function B +# is not disassembled because it does not have profile. Function A +# triggers a special conditional that forced BOLT to rewrite its jump +# table in-place (instead of moving it) because it is marked as +# non-simple (in this case, containing unknown control flow). The +# first entry of B's jump table (a PIC offset) happens to be a valid +# address inside A when added to A's jump table base address. In this +# case, BOLT could overwrite B's jump table, corrupting it, thinking +# the first entry of it is actually part of A's jump table. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: ld.lld %t.o -o %t.exe -q -T %S/Inputs/jt-pic-linkerscript.ld +# RUN: llvm-bolt %t.exe -relocs -o %t.out -data %t.fdata \ +# RUN: -lite=1 +# RUN: llvm-readelf -S %t.out | FileCheck --check-prefix=CHECK %s +# The output binary is runnable, but we check for test success with +# readelf. This is another way to check this bug: +# COM: %t.out + +# BOLT needs to create a new rodata section, indicating that it +# successfully moved the jump table in _start. +# CHECK: [{{.*}}] .bolt.org.rodata + + .globl _start + .type _start, %function +_start: + .cfi_startproc +# FDATA: 0 [unknown] 0 1 _start 0 0 1 + push %rbp + mov %rsp, %rbp + mov 0x8(%rbp), %rdi + cmpq $3, %rdi + ja .L5 + jmp .L6 +# Unreachable code, here to mark this function as non-simple +# (containing unknown control flow) with a stray indirect jmp + jmp *%rax +.L6: + decq %rdi + leaq .LJT1(%rip), %rcx + movslq (%rcx, %rdi, 4), %rax + addq %rcx, %rax + jmp *%rax +.L1: + leaq str1(%rip), %rsi + jmp .L4 +.L2: + leaq str2(%rip), %rsi + jmp .L4 +.L3: + leaq str3(%rip), %rsi + jmp .L4 +.L5: + leaq str4(%rip), %rsi +.L4: + movq $1, %rdi + movq $10, %rdx + movq $1, %rax + syscall + mov 0x8(%rbp), %rdi + decq %rdi + callq func_b + movq %rax, %rdi + movq $231, %rax + syscall + pop %rbp + ret + .cfi_endproc + .size _start, .-_start + + .globl func_b + .type func_b, %function +func_b: + .cfi_startproc + push %rbp + mov %rsp, %rbp + cmpq $3, %rdi + ja .L2_6 +# FT + leaq .LJT2(%rip), %rcx + movslq (%rcx, %rdi, 4), %rax + addq %rcx, %rax + jmp *%rax +.L2_1: + movq $0, %rax + jmp .L2_5 +.L2_2: + movq $1, %rax + jmp .L2_5 +.L2_3: + movq $2, %rax + jmp .L2_5 +.L2_4: + movq $3, %rax + jmp .L2_5 +.L2_6: + movq $-1, %rax +.L2_5: + popq %rbp + ret + .cfi_endproc + .size func_b, .-func_b + + .rodata +str1: .asciz "Message 1\n" +str2: .asciz "Message 2\n" +str3: .asciz "Message 3\n" +str4: .asciz "Highrange\n" +# Special case where the first .LJT2 entry is a valid offset of +# _start when interpreted with .LJT1 as a base address. +.LJT1: + .long .L1-.LJT1 + .long .L2-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 +.LJT2: + .long .L2_1-.LJT2 + .long .L2_2-.LJT2 + .long .L2_3-.LJT2 + .long .L2_4-.LJT2 From ba65584d1545951ce958ea5729692374055d6b9f Mon Sep 17 00:00:00 2001 From: Renaud-K Date: Wed, 2 Nov 2022 15:39:58 -0700 Subject: [PATCH 280/516] Alias Analysis infra in Flang Differential revision: https://reviews.llvm.org/D136889 --- flang/CMakeLists.txt | 3 + .../flang/Optimizer/Analysis/AliasAnalysis.h | 29 ++++ flang/lib/Lower/IntrinsicCall.cpp | 2 +- .../lib/Optimizer/Analysis/AliasAnalysis.cpp | 67 +++++++++ flang/lib/Optimizer/Analysis/CMakeLists.txt | 16 +++ flang/lib/Optimizer/CMakeLists.txt | 1 + flang/test/CMakeLists.txt | 1 + .../lib/Analysis/AliasAnalysis/CMakeLists.txt | 29 ++++ .../AliasAnalysis/TestAliasAnalysis.cpp | 72 ++++++++++ .../AliasAnalysis/alias-analysis-1.fir | 21 +++ flang/test/lib/Analysis/CMakeLists.txt | 1 + flang/test/lib/CMakeLists.txt | 1 + flang/test/lib/lit.local.cfg | 7 + flang/tools/fir-opt/CMakeLists.txt | 8 ++ flang/tools/fir-opt/fir-opt.cpp | 8 ++ mlir/test/lib/Analysis/TestAliasAnalysis.cpp | 129 ++++++++++-------- mlir/test/lib/Analysis/TestAliasAnalysis.h | 37 +++++ 17 files changed, 375 insertions(+), 57 deletions(-) create mode 100644 flang/include/flang/Optimizer/Analysis/AliasAnalysis.h create mode 100644 flang/lib/Optimizer/Analysis/AliasAnalysis.cpp create mode 100644 flang/lib/Optimizer/Analysis/CMakeLists.txt create mode 100644 flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt create mode 100644 flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp create mode 100644 flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir create mode 100644 flang/test/lib/Analysis/CMakeLists.txt create mode 100644 flang/test/lib/CMakeLists.txt create mode 100644 flang/test/lib/lit.local.cfg create mode 100644 mlir/test/lib/Analysis/TestAliasAnalysis.h diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 8f00c25bfc2fe..ca9124c05f4fe 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -411,6 +411,9 @@ endif() include(CMakeParseArguments) include(AddFlang) +if (FLANG_INCLUDE_TESTS) + add_compile_definitions(FLANG_INCLUDE_TESTS=1) +endif() add_subdirectory(include) add_subdirectory(lib) diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h new file mode 100644 index 0000000000000..a3b20b7bbfecc --- /dev/null +++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h @@ -0,0 +1,29 @@ +//===- AliasAnalysis.h - Alias Analysis in FIR -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FIR_ANALYSIS_ALIASANALYSIS_H_ +#define FIR_ANALYSIS_ALIASANALYSIS_H_ + +#include "mlir/Analysis/AliasAnalysis.h" + +namespace fir { + +//===----------------------------------------------------------------------===// +// AliasAnalysis +//===----------------------------------------------------------------------===// +class AliasAnalysis { +public: + /// Given two values, return their aliasing behavior. + mlir::AliasResult alias(mlir::Value lhs, mlir::Value rhs); + + /// Return the modify-reference behavior of `op` on `location`. + mlir::ModRefResult getModRef(mlir::Operation *op, mlir::Value location); +}; +} // namespace fir + +#endif // FIR_ANALYSIS_ALIASANALYSIS_H_ diff --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp index 0184352336684..abd31558aa05d 100644 --- a/flang/lib/Lower/IntrinsicCall.cpp +++ b/flang/lib/Lower/IntrinsicCall.cpp @@ -5004,7 +5004,7 @@ Fortran::lower::getIntrinsicArgumentLowering(llvm::StringRef specificName) { /// intrinsic function. Fortran::lower::ArgLoweringRule Fortran::lower::lowerIntrinsicArgumentAs( const IntrinsicArgumentLoweringRules &rules, unsigned position) { - assert(position < sizeof(rules.args) / sizeof(decltype(*rules.args)) && + assert(position < sizeof(rules.args) / (sizeof(decltype(*rules.args))) && "invalid argument"); return {rules.args[position].lowerAs, rules.args[position].handleDynamicOptional}; diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp new file mode 100644 index 0000000000000..85f4743d53c67 --- /dev/null +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -0,0 +1,67 @@ +//===- AliasAnalysis.cpp - Alias Analysis for FIR ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Analysis/AliasAnalysis.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// AliasAnalysis: alias +//===----------------------------------------------------------------------===// + +namespace fir { +AliasResult AliasAnalysis::alias(Value lhs, Value rhs) { + // This is for now a mock analysis + if (lhs == rhs) { + return AliasResult::MustAlias; + } + return AliasResult::MayAlias; +} + +//===----------------------------------------------------------------------===// +// AliasAnalysis: getModRef +//===----------------------------------------------------------------------===// + +/// This is mostly inspired by MLIR::LocalAliasAnalysis with 2 notable +/// differences 1) Regions are not handled here but will be handled by a data +/// flow analysis to come 2) Allocate and Free effects are considered modifying +ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) { + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) + return ModRefResult::getModAndRef(); + + // Build a ModRefResult by merging the behavior of the effects of this + // operation. + SmallVector effects; + interface.getEffects(effects); + + ModRefResult result = ModRefResult::getNoModRef(); + for (const MemoryEffects::EffectInstance &effect : effects) { + + // Check for an alias between the effect and our memory location. + AliasResult aliasResult = AliasResult::MayAlias; + if (Value effectValue = effect.getValue()) + aliasResult = alias(effectValue, location); + + // If we don't alias, ignore this effect. + if (aliasResult.isNo()) + continue; + + // Merge in the corresponding mod or ref for this effect. + if (isa(effect.getEffect())) { + result = result.merge(ModRefResult::getRef()); + } else { + result = result.merge(ModRefResult::getMod()); + } + if (result.isModAndRef()) + break; + } + return result; +} +} // namespace fir diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt new file mode 100644 index 0000000000000..4ebe7d8c78c33 --- /dev/null +++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt @@ -0,0 +1,16 @@ +add_flang_library(FIRAnalysis + AliasAnalysis.cpp + + DEPENDS + FIRBuilder + FIRDialect + FIRSupport + + LINK_LIBS + FIRBuilder + FIRDialect + MLIRFuncDialect + MLIRLLVMDialect + MLIRMathTransforms + FIRSupport +) diff --git a/flang/lib/Optimizer/CMakeLists.txt b/flang/lib/Optimizer/CMakeLists.txt index 2320bf4f44270..4a602162ed2b7 100644 --- a/flang/lib/Optimizer/CMakeLists.txt +++ b/flang/lib/Optimizer/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Support) add_subdirectory(Transforms) +add_subdirectory(Analysis) diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index d8dca531d9398..7601e1e4c87a4 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -1,5 +1,6 @@ # Test runner infrastructure for Flang. This configures the Flang test trees # for use by Lit, and delegates to LLVM's lit test handlers. +add_subdirectory(lib) llvm_canonicalize_cmake_booleans( FLANG_BUILD_EXAMPLES diff --git a/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt b/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt new file mode 100644 index 0000000000000..c4b3838c9a23e --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt @@ -0,0 +1,29 @@ +# Exclude tests from libMLIR.so +add_flang_library(FIRTestAnalysis + TestAliasAnalysis.cpp + + DEPENDS + FIRDialect + FIRBuilder + FIRSupport + FIRTransforms + FIRAnalysis + ${dialect_libs} + + LINK_LIBS + FIRDialect + FIRBuilder + FIRSupport + FIRTransforms + FIRAnalysis + ${dialect_libs} + MLIRFuncDialect + MLIRLLVMDialect + MLIRAnalysis + MLIRTestAnalysis + ) + +target_include_directories(FIRTestAnalysis + PRIVATE + ${MLIR_MAIN_SRC_DIR}/.. + ) \ No newline at end of file diff --git a/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp b/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp new file mode 100644 index 0000000000000..39aaf8fba180a --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp @@ -0,0 +1,72 @@ +//===- TestAliasAnalysis.cpp - Test FIR lias analysis -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/test/lib/Analysis/TestAliasAnalysis.h" +#include "mlir/Analysis/AliasAnalysis.h" +#include "mlir/Pass/Pass.h" +#include "flang/Optimizer/Analysis/AliasAnalysis.h" + +using namespace mlir; + +namespace { + +//===----------------------------------------------------------------------===// +// Testing AliasResult +//===----------------------------------------------------------------------===// + +struct TestFIRAliasAnalysisPass + : public test::TestAliasAnalysisBase, + PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestFIRAliasAnalysisPass) + + StringRef getArgument() const final { return "test-fir-alias-analysis"; } + StringRef getDescription() const final { + return "Test alias analysis results."; + } + void runOnOperation() override { + mlir::AliasAnalysis aliasAnalysis(getOperation()); + aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis()); + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); + } +}; + +//===----------------------------------------------------------------------===// +// Testing ModRefResult +//===----------------------------------------------------------------------===// + +struct TestFIRAliasAnalysisModRefPass + : public test::TestAliasAnalysisModRefBase, + PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestFIRAliasAnalysisModRefPass) + + StringRef getArgument() const final { + return "test-fir-alias-analysis-modref"; + } + StringRef getDescription() const final { + return "Test alias analysis ModRef results."; + } + void runOnOperation() override { + mlir::AliasAnalysis aliasAnalysis(getOperation()); + aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis()); + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Pass Registration +//===----------------------------------------------------------------------===// + +namespace fir { +namespace test { +void registerTestFIRAliasAnalysisPass() { + PassRegistration(); + PassRegistration(); +} +} // namespace test +} // namespace fir \ No newline at end of file diff --git a/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir b/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir new file mode 100644 index 0000000000000..4ed492ebae2e4 --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir @@ -0,0 +1,21 @@ +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file 2>&1 | FileCheck %s + +// CHECK-LABEL: Testing : "_QPtest" +// CHECK-DAG: alloca_1#0 <-> address_of#0: MayAlias +func.func @_QPtest(%arg1: !fir.ref) { + %c1_i32 = arith.constant 1 : i32 + %0 = fir.alloca () -> () {test.ptr = "alloca_1"} + %1 = fir.address_of(@_QPf) {test.ptr = "address_of"} : () -> i32 + %2 = fir.convert %1 : (() -> i32) -> (() -> ()) + %4 = fir.convert %0 : (!fir.ref<() -> ()>) -> !fir.llvm_ptr<() -> ()> + fir.store %2 to %4 : !fir.llvm_ptr<() -> ()> + %6 = fir.load %0 : !fir.ref<() -> ()> + fir.call @_QPs(%6) : (() -> ()) -> () + return +} + +// ----- +func.func private @_QPs(%arg0: () -> ()) + +// ----- +func.func private @_QPf() -> i32 diff --git a/flang/test/lib/Analysis/CMakeLists.txt b/flang/test/lib/Analysis/CMakeLists.txt new file mode 100644 index 0000000000000..2b313d6c615aa --- /dev/null +++ b/flang/test/lib/Analysis/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(AliasAnalysis) diff --git a/flang/test/lib/CMakeLists.txt b/flang/test/lib/CMakeLists.txt new file mode 100644 index 0000000000000..fc6ef10fab1f5 --- /dev/null +++ b/flang/test/lib/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Analysis) diff --git a/flang/test/lib/lit.local.cfg b/flang/test/lib/lit.local.cfg new file mode 100644 index 0000000000000..9832f42447387 --- /dev/null +++ b/flang/test/lib/lit.local.cfg @@ -0,0 +1,7 @@ + +# Excluding .cpp file from the extensions since from this level down they are used for the development +config.suffixes = ['.c', '.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', + '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf' + '.CUF', '.f18', '.F18', '.f03', '.F03', '.f08', '.F08', + '.ll', '.fir', '.mlir'] + diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index adbdb23739dd0..1914c370a407e 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -2,12 +2,20 @@ add_flang_tool(fir-opt fir-opt.cpp) llvm_update_compile_flags(fir-opt) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +if(FLANG_INCLUDE_TESTS) + set(test_libs + FIRTestAnalysis + ) +endif() + target_link_libraries(fir-opt PRIVATE FIRDialect FIRSupport FIRTransforms FIRCodeGen HLFIRDialect + FIRAnalysis + ${test_libs} ${dialect_libs} # TODO: these should be transitive dependencies from a target providing diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index dc3a0a4b84764..a35960be75e6b 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -17,11 +17,19 @@ #include "flang/Optimizer/Transforms/Passes.h" using namespace mlir; +namespace fir { +namespace test { +void registerTestFIRAliasAnalysisPass(); +} // namespace test +} // namespace fir int main(int argc, char **argv) { fir::support::registerMLIRPassesForFortranTools(); fir::registerOptCodeGenPasses(); fir::registerOptTransformPasses(); +#ifdef FLANG_INCLUDE_TESTS + fir::test::registerTestFIRAliasAnalysisPass(); +#endif DialectRegistry registry; fir::support::registerDialects(registry); return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n", diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp index 284ea4cffeca4..04b2bc3906a8a 100644 --- a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "TestAliasAnalysis.h" #include "mlir/Analysis/AliasAnalysis.h" #include "mlir/Pass/Pass.h" @@ -39,13 +40,80 @@ static void printAliasOperand(Value value) { llvm::errs() << "#" << result.getResultNumber(); } +namespace mlir { +namespace test { +void printAliasResult(AliasResult result, Value lhs, Value rhs) { + printAliasOperand(lhs); + llvm::errs() << " <-> "; + printAliasOperand(rhs); + llvm::errs() << ": " << result << "\n"; +} + +/// Print the result of an alias query. +void printModRefResult(ModRefResult result, Operation *op, Value location) { + printAliasOperand(op); + llvm::errs() << " -> "; + printAliasOperand(location); + llvm::errs() << ": " << result << "\n"; +} + +void TestAliasAnalysisBase::runAliasAnalysisOnOperation( + Operation *op, AliasAnalysis &aliasAnalysis) { + llvm::errs() << "Testing : " << op->getAttr("sym_name") << "\n"; + + // Collect all of the values to check for aliasing behavior. + SmallVector valsToCheck; + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + valsToCheck.append(op->result_begin(), op->result_end()); + for (Region ®ion : op->getRegions()) + for (Block &block : region) + valsToCheck.append(block.args_begin(), block.args_end()); + }); + + // Check for aliasing behavior between each of the values. + for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) + for (auto *innerIt = valsToCheck.begin(); innerIt != it; ++innerIt) + printAliasResult(aliasAnalysis.alias(*innerIt, *it), *innerIt, *it); +} + +void TestAliasAnalysisModRefBase::runAliasAnalysisOnOperation( + Operation *op, AliasAnalysis &aliasAnalysis) { + llvm::errs() << "Testing : " << op->getAttr("sym_name") << "\n"; + + // Collect all of the values to check for aliasing behavior. + SmallVector valsToCheck; + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + valsToCheck.append(op->result_begin(), op->result_end()); + for (Region ®ion : op->getRegions()) + for (Block &block : region) + valsToCheck.append(block.args_begin(), block.args_end()); + }); + + // Check for aliasing behavior between each of the values. + for (auto &it : valsToCheck) { + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + printModRefResult(aliasAnalysis.getModRef(op, it), op, it); + }); + } +} + +} // namespace test +} // namespace mlir + //===----------------------------------------------------------------------===// // Testing AliasResult //===----------------------------------------------------------------------===// namespace { struct TestAliasAnalysisPass - : public PassWrapper> { + : public test::TestAliasAnalysisBase, + PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasAnalysisPass) StringRef getArgument() const final { return "test-alias-analysis"; } @@ -53,32 +121,8 @@ struct TestAliasAnalysisPass return "Test alias analysis results."; } void runOnOperation() override { - llvm::errs() << "Testing : " << getOperation()->getAttr("sym_name") << "\n"; - - // Collect all of the values to check for aliasing behavior. AliasAnalysis &aliasAnalysis = getAnalysis(); - SmallVector valsToCheck; - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - valsToCheck.append(op->result_begin(), op->result_end()); - for (Region ®ion : op->getRegions()) - for (Block &block : region) - valsToCheck.append(block.args_begin(), block.args_end()); - }); - - // Check for aliasing behavior between each of the values. - for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) - for (auto *innerIt = valsToCheck.begin(); innerIt != it; ++innerIt) - printAliasResult(aliasAnalysis.alias(*innerIt, *it), *innerIt, *it); - } - - /// Print the result of an alias query. - void printAliasResult(AliasResult result, Value lhs, Value rhs) { - printAliasOperand(lhs); - llvm::errs() << " <-> "; - printAliasOperand(rhs); - llvm::errs() << ": " << result << "\n"; + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); } }; } // namespace @@ -89,7 +133,8 @@ struct TestAliasAnalysisPass namespace { struct TestAliasAnalysisModRefPass - : public PassWrapper> { + : public test::TestAliasAnalysisModRefBase, + PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasAnalysisModRefPass) StringRef getArgument() const final { return "test-alias-analysis-modref"; } @@ -97,36 +142,8 @@ struct TestAliasAnalysisModRefPass return "Test alias analysis ModRef results."; } void runOnOperation() override { - llvm::errs() << "Testing : " << getOperation()->getAttr("sym_name") << "\n"; - - // Collect all of the values to check for aliasing behavior. AliasAnalysis &aliasAnalysis = getAnalysis(); - SmallVector valsToCheck; - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - valsToCheck.append(op->result_begin(), op->result_end()); - for (Region ®ion : op->getRegions()) - for (Block &block : region) - valsToCheck.append(block.args_begin(), block.args_end()); - }); - - // Check for aliasing behavior between each of the values. - for (auto &it : valsToCheck) { - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - printModRefResult(aliasAnalysis.getModRef(op, it), op, it); - }); - } - } - - /// Print the result of an alias query. - void printModRefResult(ModRefResult result, Operation *op, Value location) { - printAliasOperand(op); - llvm::errs() << " -> "; - printAliasOperand(location); - llvm::errs() << ": " << result << "\n"; + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); } }; } // namespace diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.h b/mlir/test/lib/Analysis/TestAliasAnalysis.h new file mode 100644 index 0000000000000..f84b2fabb092a --- /dev/null +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.h @@ -0,0 +1,37 @@ +//===- TestAliasAnalysis.h - MLIR Test Utility ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a common facility that can be reused for the +// testing of various aliasing analyses +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H +#define MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H + +#include "mlir/Analysis/AliasAnalysis.h" + +namespace mlir { +namespace test { + +/// Print the result of an alias query. +void printAliasResult(AliasResult result, Value lhs, Value rhs); +void printModRefResult(ModRefResult result, Operation *op, Value location); + +struct TestAliasAnalysisBase { + void runAliasAnalysisOnOperation(Operation *op, AliasAnalysis &aliasAnalysis); +}; + +struct TestAliasAnalysisModRefBase { + void runAliasAnalysisOnOperation(Operation *op, AliasAnalysis &aliasAnalysis); +}; + +} // namespace test +} // namespace mlir + +#endif // MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H From 52ffc728181bc2d3c889f7f80c252c3433b9e7b6 Mon Sep 17 00:00:00 2001 From: Hanhan Wang Date: Fri, 4 Nov 2022 13:58:59 -0700 Subject: [PATCH 281/516] [mlir][tiling] Relax tiling to accept generating multiple operations. Some operations need to generate multiple operations when implementing the tiling interface. Here is a sound example in IREE, see https://github.com/iree-org/iree/pull/10905 for more details. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D137300 --- .../Dialect/SCF/Transforms/TileUsingInterface.h | 6 ++++-- .../Linalg/TransformOps/LinalgTransformOps.cpp | 6 +++--- .../Dialect/SCF/Transforms/TileUsingInterface.cpp | 15 ++++++--------- .../TilingInterface/TestTilingInterface.cpp | 3 ++- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index 9fa4114c77b11..151993cc3d9a4 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -62,8 +62,10 @@ struct SCFTilingOptions { /// Transformation information returned after tiling. struct SCFTilingResult { - /// The tiled operation generated. - Operation *tiledOp; + /// Tiled operations that are generated during tiling. The order does not + /// matter except the last op. The replacements are expected to be the results + /// of the last op. + SmallVector tiledOps; /// The `scf.for` operations that iterate over the tiles. SmallVector loops; /// Values to use as replacements for the untiled op. Is the same size as the diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index a35dd14483963..6b8ca9125c82d 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -931,7 +931,7 @@ transform::ScalarizeOp::applyToOne(linalg::LinalgOp target, if (failed(maybeTilingResult)) return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); - results.push_back(maybeTilingResult->tiledOp); + results.append(maybeTilingResult->tiledOps); return DiagnosedSilenceableFailure(success()); } @@ -1251,7 +1251,7 @@ transform::TileOp::apply(TransformResults &transformResults, rewriter.replaceOp(linalgOp, maybeTilingResult->loops.front()->getResults()); - tiled.push_back(maybeTilingResult->tiledOp); + tiled.append(maybeTilingResult->tiledOps); for (const auto &en2 : llvm::enumerate(maybeTilingResult->loops)) loops[en2.index()].push_back(en2.value()); } @@ -1609,7 +1609,7 @@ transform::TileToScfForOp::apply(TransformResults &transformResults, rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements); - tiled.push_back(tilingResult->tiledOp); + tiled.append(tilingResult->tiledOps); for (const auto &en2 : llvm::enumerate(tilingResult->loops)) loops[en2.index()].push_back(en2.value()); } diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index 0c86bd4d1262a..6e59bdb09b12d 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -360,11 +360,7 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, tilingResult.loops.back().getBody()->getTerminator()); SmallVector tiledImplementation = op.getTiledImplementation(rewriter, offsets, sizes); - if (tiledImplementation.size() != 1) { - return rewriter.notifyMatchFailure( - op, "expected tiled implementation to return a single op"); - } - tilingResult.tiledOp = tiledImplementation[0]; + tilingResult.tiledOps.append(tiledImplementation); if (op->getNumResults() == 0) { // nothing more to do. return tilingResult; @@ -396,13 +392,13 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, } FailureOr> replacementOr = yieldTiledValues( - rewriter, destinationTensors, tilingResult.tiledOp->getResults(), + rewriter, destinationTensors, tilingResult.tiledOps.back()->getResults(), resultOffsetsList, resultSizesList, tilingResult.loops); if (failed(replacementOr)) return rewriter.notifyMatchFailure(op, "failed to yield replacement"); if (auto dstOp = - dyn_cast(tilingResult.tiledOp)) { + dyn_cast(tilingResult.tiledOps.back())) { auto innerMostLoop = tilingResult.loops.back(); SmallVector destinationTensors = dstOp.getDpsInitOperands(); assert(destinationTensors.size() == @@ -554,13 +550,14 @@ mlir::scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp( tileUsingSCFForOp(rewriter, consumer, options.tilingOptions); if (failed(tilingResult)) return rewriter.notifyMatchFailure(consumer, "failed to tile consumer"); - tileAndFuseResult.tiledAndFusedOps.insert(tilingResult->tiledOp); + for (auto tiledOp : tilingResult->tiledOps) + tileAndFuseResult.tiledAndFusedOps.insert(tiledOp); tileAndFuseResult.loops = std::move(tilingResult->loops); for (const auto &result : llvm::enumerate( llvm::zip(consumer->getResults(), tilingResult->replacements))) { tileAndFuseResult.replacements[std::get<0>(result.value())] = std::get<1>(result.value()); - yieldedValueToResultNumber[tilingResult->tiledOp->getResult( + yieldedValueToResultNumber[tilingResult->tiledOps.back()->getResult( result.index())] = result.index(); } } diff --git a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp index 31e3c1a529a7c..1644179c427c3 100644 --- a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp +++ b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp @@ -193,7 +193,8 @@ struct TestTileUsingSCFForOp rewriter.eraseOp(op); } - filter.replaceLinalgTransformationFilter(rewriter, tilingResult->tiledOp); + for (auto tiledOp : tilingResult->tiledOps) + filter.replaceLinalgTransformationFilter(rewriter, tiledOp); return success(); } From 35a870c30aaa9b16ccb45a9222471433b43ae66c Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 4 Nov 2022 22:09:58 +0100 Subject: [PATCH 282/516] Revert "Implement CWG2631" This reverts commit 7acfe3629479c8489fc2d7f629994dc200be990c. This reverts commit 5f87a892a7bed9cb0599573b9aaf387bc1df9c14. This reverts commit 6875ac69279a3a02fab382a2c8d121558ecbfa91. --- clang/docs/ReleaseNotes.rst | 5 - clang/include/clang/AST/ExprCXX.h | 108 ++------ clang/include/clang/AST/Stmt.h | 7 - .../clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Sema/Sema.h | 87 +----- clang/lib/AST/ASTImporter.cpp | 21 +- clang/lib/AST/Decl.cpp | 3 +- clang/lib/AST/ExprCXX.cpp | 68 +---- clang/lib/Parse/ParseCXXInlineMethods.cpp | 5 - clang/lib/Parse/ParseDeclCXX.cpp | 6 +- clang/lib/Sema/SemaDeclCXX.cpp | 90 ++++-- clang/lib/Sema/SemaExpr.cpp | 257 ++---------------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- clang/lib/Sema/TreeTransform.h | 21 +- clang/lib/Serialization/ASTReaderStmt.cpp | 12 +- clang/lib/Serialization/ASTWriterStmt.cpp | 6 - clang/test/CXX/class/class.local/p1-0x.cpp | 4 +- .../CodeGenCXX/builtin-source-location.cpp | 2 - .../default-arguments-with-immediate.cpp | 54 ---- .../default-argument-with-immediate-calls.cpp | 34 --- .../cxx2a-consteval-default-params.cpp | 68 ----- clang/test/SemaCXX/source_location.cpp | 64 +---- 22 files changed, 146 insertions(+), 788 deletions(-) delete mode 100644 clang/test/CodeGenCXX/default-arguments-with-immediate.cpp delete mode 100644 clang/test/PCH/default-argument-with-immediate-calls.cpp delete mode 100644 clang/test/SemaCXX/cxx2a-consteval-default-params.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e16191e06e337..7bb1405c131ab 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -578,11 +578,6 @@ C++ Language Changes in Clang conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. - Implemented DR2358 allowing init captures in lambdas in default arguments. -- Implemented DR2631. Invalid ``consteval`` calls in default arguments and default - member initializers are diagnosed when and if the default is used. - This Fixes `Issue 56379 `_ - and changes the value of ``std::source_location::current()`` - used in default parameters calls compared to previous versions of Clang. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index 098720d9469f0..0b927c0294752 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -1245,12 +1245,8 @@ class CXXThrowExpr : public Expr { /// This wraps up a function call argument that was created from the /// corresponding parameter's default argument, when the call did not /// explicitly supply arguments for all of the parameters. -class CXXDefaultArgExpr final - : public Expr, - private llvm::TrailingObjects { +class CXXDefaultArgExpr final : public Expr { friend class ASTStmtReader; - friend class ASTReader; - friend TrailingObjects; /// The parameter whose default is being used. ParmVarDecl *Param; @@ -1259,7 +1255,7 @@ class CXXDefaultArgExpr final DeclContext *UsedContext; CXXDefaultArgExpr(StmtClass SC, SourceLocation Loc, ParmVarDecl *Param, - Expr *RewrittenExpr, DeclContext *UsedContext) + DeclContext *UsedContext) : Expr(SC, Param->hasUnparsedDefaultArg() ? Param->getType().getNonReferenceType() @@ -1268,58 +1264,28 @@ class CXXDefaultArgExpr final Param->getDefaultArg()->getObjectKind()), Param(Param), UsedContext(UsedContext) { CXXDefaultArgExprBits.Loc = Loc; - CXXDefaultArgExprBits.HasRewrittenInit = RewrittenExpr != nullptr; - if (RewrittenExpr) - *getTrailingObjects() = RewrittenExpr; setDependence(computeDependence(this)); } - CXXDefaultArgExpr(EmptyShell Empty, bool HasRewrittenInit) - : Expr(CXXDefaultArgExprClass, Empty) { - CXXDefaultArgExprBits.HasRewrittenInit = HasRewrittenInit; - } - - size_t numTrailingObjects() const { - return CXXDefaultArgExprBits.HasRewrittenInit; - } - public: - static CXXDefaultArgExpr *CreateEmpty(const ASTContext &C, - bool HasRewrittenInit); + CXXDefaultArgExpr(EmptyShell Empty) : Expr(CXXDefaultArgExprClass, Empty) {} // \p Param is the parameter whose default argument is used by this // expression. static CXXDefaultArgExpr *Create(const ASTContext &C, SourceLocation Loc, - ParmVarDecl *Param, Expr *RewrittenExpr, - DeclContext *UsedContext); + ParmVarDecl *Param, + DeclContext *UsedContext) { + return new (C) + CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, UsedContext); + } + // Retrieve the parameter that the argument was created from. const ParmVarDecl *getParam() const { return Param; } ParmVarDecl *getParam() { return Param; } - bool hasRewrittenInit() const { - return CXXDefaultArgExprBits.HasRewrittenInit; - } - - // Retrieve the argument to the function call. - Expr *getExpr(); - const Expr *getExpr() const { - return const_cast(this)->getExpr(); - } - - Expr *getRewrittenExpr() { - return hasRewrittenInit() ? *getTrailingObjects() : nullptr; - } - - const Expr *getRewrittenExpr() const { - return const_cast(this)->getRewrittenExpr(); - } - - // Retrieve the rewritten init expression (for an init expression containing - // immediate calls) with the top level FullExpr and ConstantExpr stripped off. - Expr *getAdjustedRewrittenExpr(); - const Expr *getAdjustedRewrittenExpr() const { - return const_cast(this)->getAdjustedRewrittenExpr(); - } + // Retrieve the actual argument to the function call. + const Expr *getExpr() const { return getParam()->getDefaultArg(); } + Expr *getExpr() { return getParam()->getDefaultArg(); } const DeclContext *getUsedContext() const { return UsedContext; } DeclContext *getUsedContext() { return UsedContext; } @@ -1356,13 +1322,10 @@ class CXXDefaultArgExpr final /// is implicitly used in a mem-initializer-list in a constructor /// (C++11 [class.base.init]p8) or in aggregate initialization /// (C++1y [dcl.init.aggr]p7). -class CXXDefaultInitExpr final - : public Expr, - private llvm::TrailingObjects { - - friend class ASTStmtReader; +class CXXDefaultInitExpr : public Expr { friend class ASTReader; - friend TrailingObjects; + friend class ASTStmtReader; + /// The field whose default is being used. FieldDecl *Field; @@ -1370,29 +1333,16 @@ class CXXDefaultInitExpr final DeclContext *UsedContext; CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, QualType Ty, DeclContext *UsedContext, - Expr *RewrittenInitExpr); - - CXXDefaultInitExpr(EmptyShell Empty, bool HasRewrittenInit) - : Expr(CXXDefaultInitExprClass, Empty) { - CXXDefaultInitExprBits.HasRewrittenInit = HasRewrittenInit; - } + FieldDecl *Field, QualType Ty, DeclContext *UsedContext); - size_t numTrailingObjects() const { - return CXXDefaultInitExprBits.HasRewrittenInit; - } + CXXDefaultInitExpr(EmptyShell Empty) : Expr(CXXDefaultInitExprClass, Empty) {} public: - static CXXDefaultInitExpr *CreateEmpty(const ASTContext &C, - bool HasRewrittenInit); /// \p Field is the non-static data member whose default initializer is used /// by this expression. static CXXDefaultInitExpr *Create(const ASTContext &Ctx, SourceLocation Loc, - FieldDecl *Field, DeclContext *UsedContext, - Expr *RewrittenInitExpr); - - bool hasRewrittenInit() const { - return CXXDefaultInitExprBits.HasRewrittenInit; + FieldDecl *Field, DeclContext *UsedContext) { + return new (Ctx) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), UsedContext); } /// Get the field whose initializer will be used. @@ -1400,23 +1350,13 @@ class CXXDefaultInitExpr final const FieldDecl *getField() const { return Field; } /// Get the initialization expression that will be used. - Expr *getExpr(); const Expr *getExpr() const { - return const_cast(this)->getExpr(); - } - - /// Retrieve the initializing expression with evaluated immediate calls, if - /// any. - const Expr *getRewrittenExpr() const { - assert(hasRewrittenInit() && "expected a rewritten init expression"); - return *getTrailingObjects(); + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + return Field->getInClassInitializer(); } - - /// Retrieve the initializing expression with evaluated immediate calls, if - /// any. - Expr *getRewrittenExpr() { - assert(hasRewrittenInit() && "expected a rewritten init expression"); - return *getTrailingObjects(); + Expr *getExpr() { + assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); + return Field->getInClassInitializer(); } const DeclContext *getUsedContext() const { return UsedContext; } diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index a894111be896a..49a66a1ea5b86 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -686,9 +686,6 @@ class alignas(void *) Stmt { unsigned : NumExprBits; - /// Whether this CXXDefaultArgExpr rewrote its argument and stores a copy. - unsigned HasRewrittenInit : 1; - /// The location where the default argument expression was used. SourceLocation Loc; }; @@ -699,10 +696,6 @@ class alignas(void *) Stmt { unsigned : NumExprBits; - /// Whether this CXXDefaultInitExprBitfields rewrote its argument and stores - /// a copy. - unsigned HasRewrittenInit : 1; - /// The location where the default initializer expression was used. SourceLocation Loc; }; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a720da687550a..1b1db765fa7a9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2644,10 +2644,6 @@ def err_invalid_consteval_take_address : Error< " of an immediate invocation">; def err_invalid_consteval_call : Error< "call to consteval function %q0 is not a constant expression">; -def note_invalid_consteval_initializer : Note< - "in the default initalizer of %0">; -def note_invalid_consteval_initializer_here : Note< - "initialized here %0">; def err_invalid_consteval_decl_kind : Error< "%0 cannot be declared consteval">; def err_invalid_constexpr : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f5151ac7f4c3e..e8c9cb966bae7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1330,25 +1330,6 @@ class Sema final { bool InDiscardedStatement; bool InImmediateFunctionContext; - bool IsCurrentlyCheckingDefaultArgumentOrInitializer = false; - - // When evaluating immediate functions in the initializer of a default - // argument or default member initializer, this is the declaration whose - // default initializer is being evaluated and the location of the call - // or constructor definition. - struct InitializationContext { - InitializationContext(SourceLocation Loc, ValueDecl *Decl, - DeclContext *Context) - : Loc(Loc), Decl(Decl), Context(Context) { - assert(Decl && Context && "invalid initialization context"); - }; - - SourceLocation Loc; - ValueDecl *Decl = nullptr; - DeclContext *Context = nullptr; - }; - llvm::Optional DelayedDefaultInitializationContext; - ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -6215,22 +6196,19 @@ class Sema final { bool IsStdInitListInitialization, bool RequiresZeroInit, unsigned ConstructKind, SourceRange ParenRange); - ExprResult ConvertMemberDefaultInitExpression(FieldDecl *FD, Expr *InitExpr, - SourceLocation InitLoc); - ExprResult BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field); /// Instantiate or parse a C++ default argument expression as necessary. /// Return true on error. bool CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *Init = nullptr, - bool SkipImmediateInvocations = true); + ParmVarDecl *Param); /// BuildCXXDefaultArgExpr - Creates a CXXDefaultArgExpr, instantiating /// the default expr if needed. - ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *Init = nullptr); + ExprResult BuildCXXDefaultArgExpr(SourceLocation CallLoc, + FunctionDecl *FD, + ParmVarDecl *Param); /// FinalizeVarWithDestructor - Prepare for calling destructor on the /// constructed variable. @@ -9634,63 +9612,6 @@ class Sema final { return ExprEvalContexts.back().isImmediateFunctionContext(); } - bool isCheckingDefaultArgumentOrInitializer() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - const ExpressionEvaluationContextRecord &Ctx = ExprEvalContexts.back(); - return (Ctx.Context == - ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || - Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer; - } - - bool isCheckingDefaultArgumentOrInitializerOfOuterEntity() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if ((Ctx.Context == - ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed) || - Ctx.IsCurrentlyCheckingDefaultArgumentOrInitializer) - return true; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - return false; - } - return false; - } - - llvm::Optional - InnermostDeclarationWithDelayedImmediateInvocations() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - for (const auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && - Ctx.DelayedDefaultInitializationContext) - return Ctx.DelayedDefaultInitializationContext; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - break; - } - return llvm::None; - } - - llvm::Optional - OutermostDeclarationWithDelayedImmediateInvocations() const { - assert(!ExprEvalContexts.empty() && - "Must be in an expression evaluation context"); - llvm::Optional - Res; - for (auto &Ctx : llvm::reverse(ExprEvalContexts)) { - if (Ctx.Context == ExpressionEvaluationContext::PotentiallyEvaluated && - !Ctx.DelayedDefaultInitializationContext && Res) - break; - if (Ctx.isConstantEvaluated() || Ctx.isImmediateFunctionContext() || - Ctx.isUnevaluated()) - break; - Res = Ctx.DelayedDefaultInitializationContext; - } - return Res; - } - /// RAII class used to determine whether SFINAE has /// trapped any errors that occur during template argument /// deduction. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 188c0f593a9ef..631dfaebabbd6 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -7687,16 +7687,9 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (Error Err = ImportDefaultArgOfParmVarDecl(*FromParam, ToParam)) return std::move(Err); } - Expr *RewrittenInit = nullptr; - if (E->hasRewrittenInit()) { - ExpectedExpr ExprOrErr = import(E->getExpr()); - if (!ExprOrErr) - return ExprOrErr.takeError(); - RewrittenInit = ExprOrErr.get(); - } + return CXXDefaultArgExpr::Create(Importer.getToContext(), *ToUsedLocOrErr, - *ToParamOrErr, RewrittenInit, - *UsedContextOrErr); + *ToParamOrErr, *UsedContextOrErr); } ExpectedStmt @@ -8388,16 +8381,8 @@ ExpectedStmt ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { ToField->setInClassInitializer(*ToInClassInitializerOrErr); } - Expr *RewrittenInit = nullptr; - if (E->hasRewrittenInit()) { - ExpectedExpr ExprOrErr = import(E->getExpr()); - if (!ExprOrErr) - return ExprOrErr.takeError(); - RewrittenInit = ExprOrErr.get(); - } - return CXXDefaultInitExpr::Create(Importer.getToContext(), *ToBeginLocOrErr, - ToField, *UsedContextOrErr, RewrittenInit); + ToField, *UsedContextOrErr); } ExpectedStmt ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 659d4f74d7042..04808643ab84a 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2889,7 +2889,8 @@ Expr *ParmVarDecl::getDefaultArg() { Expr *Arg = getInit(); if (auto *E = dyn_cast_or_null(Arg)) - return E->getSubExpr(); + if (!isa(E)) + return E->getSubExpr(); return Arg; } diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index 6a6f692dec787..3bf3eab72846c 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -949,43 +949,9 @@ const IdentifierInfo *UserDefinedLiteral::getUDSuffix() const { return cast(getCalleeDecl())->getLiteralIdentifier(); } -CXXDefaultArgExpr *CXXDefaultArgExpr::CreateEmpty(const ASTContext &C, - bool HasRewrittenInit) { - size_t Size = totalSizeToAlloc(HasRewrittenInit); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultArgExpr(EmptyShell(), HasRewrittenInit); -} - -CXXDefaultArgExpr *CXXDefaultArgExpr::Create(const ASTContext &C, - SourceLocation Loc, - ParmVarDecl *Param, - Expr *RewrittenExpr, - DeclContext *UsedContext) { - size_t Size = totalSizeToAlloc(RewrittenExpr != nullptr); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultArgExpr(CXXDefaultArgExprClass, Loc, Param, - RewrittenExpr, UsedContext); -} - -Expr *CXXDefaultArgExpr::getExpr() { - return CXXDefaultArgExprBits.HasRewrittenInit ? getAdjustedRewrittenExpr() - : getParam()->getDefaultArg(); -} - -Expr *CXXDefaultArgExpr::getAdjustedRewrittenExpr() { - assert(hasRewrittenInit() && - "expected this CXXDefaultArgExpr to have a rewritten init."); - Expr *Init = getRewrittenExpr(); - if (auto *E = dyn_cast_if_present(Init)) - if (!isa(E)) - return E->getSubExpr(); - return Init; -} - CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, SourceLocation Loc, FieldDecl *Field, - QualType Ty, DeclContext *UsedContext, - Expr *RewrittenInitExpr) + QualType Ty, DeclContext *UsedContext) : Expr(CXXDefaultInitExprClass, Ty.getNonLValueExprType(Ctx), Ty->isLValueReferenceType() ? VK_LValue : Ty->isRValueReferenceType() ? VK_XValue @@ -993,43 +959,11 @@ CXXDefaultInitExpr::CXXDefaultInitExpr(const ASTContext &Ctx, /*FIXME*/ OK_Ordinary), Field(Field), UsedContext(UsedContext) { CXXDefaultInitExprBits.Loc = Loc; - CXXDefaultInitExprBits.HasRewrittenInit = RewrittenInitExpr != nullptr; - - if (CXXDefaultInitExprBits.HasRewrittenInit) - *getTrailingObjects() = RewrittenInitExpr; - assert(Field->hasInClassInitializer()); setDependence(computeDependence(this)); } -CXXDefaultInitExpr *CXXDefaultInitExpr::CreateEmpty(const ASTContext &C, - bool HasRewrittenInit) { - size_t Size = totalSizeToAlloc(HasRewrittenInit); - auto *Mem = C.Allocate(Size, alignof(CXXDefaultInitExpr)); - return new (Mem) CXXDefaultInitExpr(EmptyShell(), HasRewrittenInit); -} - -CXXDefaultInitExpr *CXXDefaultInitExpr::Create(const ASTContext &Ctx, - SourceLocation Loc, - FieldDecl *Field, - DeclContext *UsedContext, - Expr *RewrittenInitExpr) { - - size_t Size = totalSizeToAlloc(RewrittenInitExpr != nullptr); - auto *Mem = Ctx.Allocate(Size, alignof(CXXDefaultArgExpr)); - return new (Mem) CXXDefaultInitExpr(Ctx, Loc, Field, Field->getType(), - UsedContext, RewrittenInitExpr); -} - -Expr *CXXDefaultInitExpr::getExpr() { - assert(Field->getInClassInitializer() && "initializer hasn't been parsed"); - if (hasRewrittenInit()) - return getRewrittenExpr(); - - return Field->getInClassInitializer(); -} - CXXTemporary *CXXTemporary::Create(const ASTContext &C, const CXXDestructorDecl *Destructor) { return new (C) CXXTemporary(Destructor); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 3a7f5426d4a70..d918ea26b9d9d 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -648,11 +648,6 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) { Actions.ActOnStartCXXInClassMemberInitializer(); - // The initializer isn't actually potentially evaluated unless it is - // used. - EnterExpressionEvaluationContext Eval( - Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed); - ExprResult Init = ParseCXXMemberInitializer(MI.Field, /*IsFunction=*/false, EqualLoc); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index a2f07ea5d59fc..bbffff5394f04 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -3184,11 +3184,7 @@ ExprResult Parser::ParseCXXMemberInitializer(Decl *D, bool IsFunction, "Data member initializer not starting with '=' or '{'"); EnterExpressionEvaluationContext Context( - Actions, - isa_and_present(D) - ? Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed - : Sema::ExpressionEvaluationContext::PotentiallyEvaluated, - D); + Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, D); if (TryConsumeToken(tok::equal, EqualLoc)) { if (Tok.is(tok::kw_delete)) { // In principle, an initializer of '= delete p;' is legal, but it will diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index c78ce37f372a0..ea7997b347959 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -4039,21 +4039,6 @@ ExprResult Sema::ActOnRequiresClause(ExprResult ConstraintExpr) { return ConstraintExpr; } -ExprResult Sema::ConvertMemberDefaultInitExpression(FieldDecl *FD, - Expr *InitExpr, - SourceLocation InitLoc) { - InitializedEntity Entity = - InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); - InitializationKind Kind = - FD->getInClassInitStyle() == ICIS_ListInit - ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), - InitExpr->getBeginLoc(), - InitExpr->getEndLoc()) - : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); - InitializationSequence Seq(*this, Entity, Kind, InitExpr); - return Seq.Perform(*this, Entity, Kind, InitExpr); -} - /// This is invoked after parsing an in-class initializer for a /// non-static C++ class member, and after instantiating an in-class initializer /// in a class template. Such actions are deferred until the class is complete. @@ -4082,7 +4067,16 @@ void Sema::ActOnFinishCXXInClassMemberInitializer(Decl *D, ExprResult Init = InitExpr; if (!FD->getType()->isDependentType() && !InitExpr->isTypeDependent()) { - Init = ConvertMemberDefaultInitExpression(FD, InitExpr, InitLoc); + InitializedEntity Entity = + InitializedEntity::InitializeMemberFromDefaultMemberInitializer(FD); + InitializationKind Kind = + FD->getInClassInitStyle() == ICIS_ListInit + ? InitializationKind::CreateDirectList(InitExpr->getBeginLoc(), + InitExpr->getBeginLoc(), + InitExpr->getEndLoc()) + : InitializationKind::CreateCopy(InitExpr->getBeginLoc(), InitLoc); + InitializationSequence Seq(*this, Entity, Kind, InitExpr); + Init = Seq.Perform(*this, Entity, Kind, InitExpr); if (Init.isInvalid()) { FD->setInvalidDecl(); return; @@ -15623,6 +15617,70 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, Constructor); } +ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { + assert(Field->hasInClassInitializer()); + + // If we already have the in-class initializer nothing needs to be done. + if (Field->getInClassInitializer()) + return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); + + // If we might have already tried and failed to instantiate, don't try again. + if (Field->isInvalidDecl()) + return ExprError(); + + // Maybe we haven't instantiated the in-class initializer. Go check the + // pattern FieldDecl to see if it has one. + CXXRecordDecl *ParentRD = cast(Field->getParent()); + + if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { + CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); + DeclContext::lookup_result Lookup = + ClassPattern->lookup(Field->getDeclName()); + + FieldDecl *Pattern = nullptr; + for (auto *L : Lookup) { + if (isa(L)) { + Pattern = cast(L); + break; + } + } + assert(Pattern && "We must have set the Pattern!"); + + if (!Pattern->hasInClassInitializer() || + InstantiateInClassInitializer(Loc, Field, Pattern, + getTemplateInstantiationArgs(Field))) { + // Don't diagnose this again. + Field->setInvalidDecl(); + return ExprError(); + } + return CXXDefaultInitExpr::Create(Context, Loc, Field, CurContext); + } + + // DR1351: + // If the brace-or-equal-initializer of a non-static data member + // invokes a defaulted default constructor of its class or of an + // enclosing class in a potentially evaluated subexpression, the + // program is ill-formed. + // + // This resolution is unworkable: the exception specification of the + // default constructor can be needed in an unevaluated context, in + // particular, in the operand of a noexcept-expression, and we can be + // unable to compute an exception specification for an enclosed class. + // + // Any attempt to resolve the exception specification of a defaulted default + // constructor before the initializer is lexically complete will ultimately + // come here at which point we can diagnose it. + RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); + Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) + << OutermostClass << Field; + Diag(Field->getEndLoc(), + diag::note_default_member_initializer_not_yet_parsed); + // Recover by marking the field invalid, unless we're in a SFINAE context. + if (!isSFINAEContext()) + Field->setInvalidDecl(); + return ExprError(); +} + void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) { if (VD->isInvalidDecl()) return; // If initializing the variable failed, don't also diagnose problems with diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ec67a6b6f28e7..2493b4a76d5e1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5856,10 +5856,8 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, - ParmVarDecl *Param, Expr *RewrittenInit, - bool SkipImmediateInvocations) { + ParmVarDecl *Param) { if (Param->hasUnparsedDefaultArg()) { - assert(!RewrittenInit && "Should not have a rewritten init expression yet"); // If we've already cleared out the location for the default argument, // that means we're parsing it right now. if (!UnparsedDefaultArgLocs.count(Param)) { @@ -5876,14 +5874,11 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, return true; } - if (Param->hasUninstantiatedDefaultArg()) { - assert(!RewrittenInit && "Should not have a rewitten init expression yet"); - if (InstantiateDefaultArgument(CallLoc, FD, Param)) - return true; - } + if (Param->hasUninstantiatedDefaultArg() && + InstantiateDefaultArgument(CallLoc, FD, Param)) + return true; - Expr *Init = RewrittenInit ? RewrittenInit : Param->getInit(); - assert(Init && "default argument but no initializer?"); + assert(Param->hasInit() && "default argument but no initializer?"); // If the default expression creates temporaries, we need to // push them to the current stack of expression temporaries so they'll @@ -5892,238 +5887,34 @@ bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD, // bound temporaries; see the comment in PR5810. // We don't need to do that with block decls, though, because // blocks in default argument expression can never capture anything. - if (auto *InitWithCleanup = dyn_cast(Init)) { + if (auto Init = dyn_cast(Param->getInit())) { // Set the "needs cleanups" bit regardless of whether there are // any explicit objects. - Cleanup.setExprNeedsCleanups(InitWithCleanup->cleanupsHaveSideEffects()); + Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects()); + // Append all the objects to the cleanup list. Right now, this // should always be a no-op, because blocks in default argument // expressions should never be able to capture anything. - assert(!InitWithCleanup->getNumObjects() && + assert(!Init->getNumObjects() && "default argument expression has capturing blocks?"); } + + // We already type-checked the argument, so we know it works. + // Just mark all of the declarations in this potentially-evaluated expression + // as being "referenced". EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer = - SkipImmediateInvocations; - MarkDeclarationsReferencedInExpr(Init, /*SkipLocalVariables*/ true); + MarkDeclarationsReferencedInExpr(Param->getDefaultArg(), + /*SkipLocalVariables=*/true); return false; } -struct ImmediateCallVisitor : public RecursiveASTVisitor { - bool HasImmediateCalls = false; - - bool VisitCallExpr(CallExpr *E) { - if (const FunctionDecl *FD = E->getDirectCallee()) - HasImmediateCalls |= FD->isConsteval(); - return RecursiveASTVisitor::VisitStmt(E); - } - - // SourceLocExpr are not immediate invocations - // but CXXDefaultInitExpr/CXXDefaultArgExpr containing a SourceLocExpr - // need to be rebuilt so that they refer to the correct SourceLocation and - // DeclContext. - bool VisitSourceLocExpr(SourceLocExpr *E) { - HasImmediateCalls = true; - return RecursiveASTVisitor::VisitStmt(E); - } - - // A nested lambda might have parameters with immediate invocations - // in their default arguments. - // The compound statement is not visited (as it does not constitute a - // subexpression). - // FIXME: We should consider visiting and transforming captures - // with init expressions. - bool VisitLambdaExpr(LambdaExpr *E) { - return VisitCXXMethodDecl(E->getCallOperator()); - } - - // Blocks don't support default parameters, and, as for lambdas, - // we don't consider their body a subexpression. - bool VisitBlockDecl(BlockDecl *B) { return false; } - - bool VisitCompoundStmt(CompoundStmt *B) { - return false; - } - - bool VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { - return TraverseStmt(E->getExpr()); - } - - bool VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { - return TraverseStmt(E->getExpr()); - } -}; - -struct EnsureImmediateInvocationInDefaultArgs - : TreeTransform { - EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef) - : TreeTransform(SemaRef) {} - - // Lambda can only have immediate invocations in the default - // args of their parameters, which is transformed upon calling the closure. - // The body is not a subexpression, so we have nothing to do. - // FIXME: Immediate calls in capture initializers should be transformed. - ExprResult TransformLambdaExpr(LambdaExpr *E) { return E; } - ExprResult TransformBlockExpr(BlockExpr *E) { return E; } -}; - ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, - FunctionDecl *FD, ParmVarDecl *Param, - Expr *Init) { + FunctionDecl *FD, ParmVarDecl *Param) { assert(Param->hasDefaultArg() && "can't build nonexistent default arg"); - - bool NestedDefaultChecking = - isCheckingDefaultArgumentOrInitializerOfOuterEntity(); - - llvm::Optional - InitializationContext = - OutermostDeclarationWithDelayedImmediateInvocations(); - if (!InitializationContext.has_value()) - InitializationContext.emplace(CallLoc, Param, CurContext); - - if (!Init && !Param->hasUnparsedDefaultArg()) { - // Mark that we are replacing a default argument first. - // If we are instantiating a template we won't have to - // retransform immediate calls. - EnterExpressionEvaluationContext EvalContext( - *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); - ExprEvalContexts.back().DelayedDefaultInitializationContext = { - CallLoc, Param, CurContext}; - - if (Param->hasUninstantiatedDefaultArg()) { - if (InstantiateDefaultArgument(CallLoc, FD, Param)) - return ExprError(); - } else { - // CWG2631 - // An immediate invocation that is not evaluated where it appears is - // evaluated and checked for whether it is a constant expression at the - // point where the enclosing initializer is used in a function call. - ImmediateCallVisitor V; - if (!NestedDefaultChecking) - V.TraverseDecl(Param); - if (V.HasImmediateCalls) { - EnsureImmediateInvocationInDefaultArgs Immediate(*this); - ExprResult Res = Immediate.TransformExpr(Param->getInit()); - if (Res.isInvalid()) - return ExprError(); - Res = ConvertParamDefaultArgument(Param, Res.get(), - Res.get()->getBeginLoc()); - if (Res.isInvalid()) - return ExprError(); - Init = Res.get(); - } - } - } - - if (CheckCXXDefaultArgExpr( - CallLoc, FD, Param, Init, - /*SkipImmediateInvocations=*/NestedDefaultChecking)) + if (CheckCXXDefaultArgExpr(CallLoc, FD, Param)) return ExprError(); - - return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param, - Init, InitializationContext->Context); -} - -ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - assert(Field->hasInClassInitializer()); - - // If we might have already tried and failed to instantiate, don't try again. - if (Field->isInvalidDecl()) - return ExprError(); - - auto *ParentRD = cast(Field->getParent()); - - llvm::Optional - InitializationContext = - OutermostDeclarationWithDelayedImmediateInvocations(); - if (!InitializationContext.has_value()) - InitializationContext.emplace(Loc, Field, CurContext); - - Expr *Init = nullptr; - - bool NestedDefaultChecking = - isCheckingDefaultArgumentOrInitializerOfOuterEntity(); - - if (!Field->getInClassInitializer()) { - // Maybe we haven't instantiated the in-class initializer. Go check the - // pattern FieldDecl to see if it has one. - if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) { - CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern(); - DeclContext::lookup_result Lookup = - ClassPattern->lookup(Field->getDeclName()); - - FieldDecl *Pattern = nullptr; - for (auto *L : Lookup) { - if ((Pattern = dyn_cast(L))) - break; - } - assert(Pattern && "We must have set the Pattern!"); - if (!Pattern->hasInClassInitializer() || - InstantiateInClassInitializer(Loc, Field, Pattern, - getTemplateInstantiationArgs(Field))) { - Field->setInvalidDecl(); - return ExprError(); - } - } - } else { - // CWG2631 - // An immediate invocation that is not evaluated where it appears is - // evaluated and checked for whether it is a constant expression at the - // point where the enclosing initializer is used in a [...] a constructor - // definition, or an aggregate initialization. - EnterExpressionEvaluationContext EvalContext( - *this, ExpressionEvaluationContext::PotentiallyEvaluated, Field); - ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field, - CurContext}; - - ImmediateCallVisitor V; - if (!NestedDefaultChecking) - V.TraverseDecl(Field); - if (V.HasImmediateCalls) { - EnsureImmediateInvocationInDefaultArgs Immediate(*this); - ExprResult Res = Immediate.TransformExpr(Field->getInClassInitializer()); - if (!Res.isInvalid()) - Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc); - if (!Res.isInvalid()) - Res = MaybeCreateExprWithCleanups(Res.get()); - if (Res.isInvalid()) { - Field->setInvalidDecl(); - return ExprError(); - } - Init = Res.get(); - } else if (!NestedDefaultChecking) { - MarkDeclarationsReferencedInExpr(Field->getInClassInitializer()); - } - } - if (Field->getInClassInitializer()) - return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc, - Field, InitializationContext->Context, - Init); - - // DR1351: - // If the brace-or-equal-initializer of a non-static data member - // invokes a defaulted default constructor of its class or of an - // enclosing class in a potentially evaluated subexpression, the - // program is ill-formed. - // - // This resolution is unworkable: the exception specification of the - // default constructor can be needed in an unevaluated context, in - // particular, in the operand of a noexcept-expression, and we can be - // unable to compute an exception specification for an enclosed class. - // - // Any attempt to resolve the exception specification of a defaulted default - // constructor before the initializer is lexically complete will ultimately - // come here at which point we can diagnose it. - RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext(); - Diag(Loc, diag::err_default_member_initializer_not_yet_parsed) - << OutermostClass << Field; - Diag(Field->getEndLoc(), - diag::note_default_member_initializer_not_yet_parsed); - // Recover by marking the field invalid, unless we're in a SFINAE context. - if (!isSFINAEContext()) - Field->setInvalidDecl(); - return ExprError(); + return CXXDefaultArgExpr::Create(Context, CallLoc, Param, CurContext); } Sema::VariadicCallType @@ -17748,7 +17539,6 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { if (isUnevaluatedContext() || !E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() || - isCheckingDefaultArgumentOrInitializer() || RebuildingImmediateInvocation || isImmediateFunctionContext()) return E; @@ -17794,14 +17584,8 @@ static void EvaluateAndDiagnoseImmediateInvocation( FD = Call->getConstructor(); else llvm_unreachable("unhandled decl kind"); - assert(FD && FD->isConsteval()); + assert(FD->isConsteval()); SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call) << FD; - if (auto Context = - SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) { - SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer) - << Context->Decl; - SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at); - } for (auto &Note : Notes) SemaRef.Diag(Note.first, Note.second); return; @@ -19947,8 +19731,7 @@ void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) { if (auto *FD = dyn_cast(E->getDecl())) if (!isUnevaluatedContext() && !isConstantEvaluated() && - !isImmediateFunctionContext() && - !isCheckingDefaultArgumentOrInitializer() && FD->isConsteval() && + !isImmediateFunctionContext() && FD->isConsteval() && !RebuildingImmediateInvocation && !FD->isDependentContext()) ExprEvalContexts.back().ReferenceToConsteval.insert(E); MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 709162e01809b..9e41dfbfdbe95 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1978,9 +1978,9 @@ ExprResult TemplateInstantiator::TransformCXXDefaultArgExpr( assert(!cast(E->getParam()->getDeclContext())-> getDescribedFunctionTemplate() && "Default arg expressions are never formed in dependent cases."); - return SemaRef.BuildCXXDefaultArgExpr( - E->getUsedLocation(), cast(E->getParam()->getDeclContext()), - E->getParam()); + return SemaRef.BuildCXXDefaultArgExpr(E->getUsedLocation(), + cast(E->getParam()->getDeclContext()), + E->getParam()); } template @@ -3407,8 +3407,6 @@ bool Sema::InstantiateInClassInitializer( ContextRAII SavedContext(*this, Instantiation->getParent()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); - ExprEvalContexts.back().DelayedDefaultInitializationContext = { - PointOfInstantiation, Instantiation, CurContext}; LocalInstantiationScope Scope(*this, true); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index ead72463aca78..ab34a9d611b9c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3172,10 +3172,9 @@ class TreeTransform { /// By default, builds a new default-argument expression, which does not /// require any semantic analysis. Subclasses may override this routine to /// provide different behavior. - ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param, - Expr *RewrittenExpr) { + ExprResult RebuildCXXDefaultArgExpr(SourceLocation Loc, ParmVarDecl *Param) { return CXXDefaultArgExpr::Create(getSema().Context, Loc, Param, - RewrittenExpr, getSema().CurContext); + getSema().CurContext); } /// Build a new C++11 default-initialization expression. @@ -3185,7 +3184,8 @@ class TreeTransform { /// routine to provide different behavior. ExprResult RebuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) { - return getSema().BuildCXXDefaultInitExpr(Loc, Field); + return CXXDefaultInitExpr::Create(getSema().Context, Loc, Field, + getSema().CurContext); } /// Build a new C++ zero-initialization expression. @@ -12094,20 +12094,11 @@ TreeTransform::TransformCXXDefaultArgExpr(CXXDefaultArgExpr *E) { if (!Param) return ExprError(); - ExprResult InitRes; - if (E->hasRewrittenInit()) { - InitRes = getDerived().TransformExpr(E->getRewrittenExpr()); - if (InitRes.isInvalid()) - return ExprError(); - } - if (!getDerived().AlwaysRebuild() && Param == E->getParam() && - E->getUsedContext() == SemaRef.CurContext && - InitRes.get() == E->getRewrittenExpr()) + E->getUsedContext() == SemaRef.CurContext) return E; - return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param, - InitRes.get()); + return getDerived().RebuildCXXDefaultArgExpr(E->getUsedLocation(), Param); } template diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 08f9f0bf50d03..2a3c6e7231785 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1824,9 +1824,6 @@ void ASTStmtReader::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { E->Param = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultArgExprBits.Loc = readSourceLocation(); - E->CXXDefaultArgExprBits.HasRewrittenInit = Record.readInt(); - if (E->CXXDefaultArgExprBits.HasRewrittenInit) - *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { @@ -1834,9 +1831,6 @@ void ASTStmtReader::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { E->Field = readDeclAs(); E->UsedContext = readDeclAs(); E->CXXDefaultInitExprBits.Loc = readSourceLocation(); - E->CXXDefaultInitExprBits.HasRewrittenInit = Record.readInt(); - if (E->CXXDefaultInitExprBits.HasRewrittenInit) - *E->getTrailingObjects() = Record.readSubExpr(); } void ASTStmtReader::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { @@ -3835,13 +3829,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case EXPR_CXX_DEFAULT_ARG: - S = CXXDefaultArgExpr::CreateEmpty( - Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); + S = new (Context) CXXDefaultArgExpr(Empty); break; case EXPR_CXX_DEFAULT_INIT: - S = CXXDefaultInitExpr::CreateEmpty( - Context, /*HasRewrittenInit=*/Record[ASTStmtReader::NumExprFields]); + S = new (Context) CXXDefaultInitExpr(Empty); break; case EXPR_CXX_BIND_TEMPORARY: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 6e4101ac122ee..e2ba69ca1eec8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1745,9 +1745,6 @@ void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) { Record.AddDeclRef(E->getParam()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getUsedLocation()); - Record.push_back(E->hasRewrittenInit()); - if (E->hasRewrittenInit()) - Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_ARG; } @@ -1756,9 +1753,6 @@ void ASTStmtWriter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) { Record.AddDeclRef(E->getField()); Record.AddDeclRef(cast_or_null(E->getUsedContext())); Record.AddSourceLocation(E->getExprLoc()); - Record.push_back(E->hasRewrittenInit()); - if (E->hasRewrittenInit()) - Record.AddStmt(E->getRewrittenExpr()); Code = serialization::EXPR_CXX_DEFAULT_INIT; } diff --git a/clang/test/CXX/class/class.local/p1-0x.cpp b/clang/test/CXX/class/class.local/p1-0x.cpp index 096f5080099ec..49125f5f9b062 100644 --- a/clang/test/CXX/class/class.local/p1-0x.cpp +++ b/clang/test/CXX/class/class.local/p1-0x.cpp @@ -11,8 +11,8 @@ void f() { int x = 3; // expected-note{{'x' declared here}} struct C { int& x2 = x; // expected-error{{reference to local variable 'x' declared in enclosing lambda expression}} - }c; // expected-note {{required here}} + }; }; - C(); // expected-note {{required here}} + C(); } diff --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp index 7af6749d0d6d6..6e44e6b0e60e3 100644 --- a/clang/test/CodeGenCXX/builtin-source-location.cpp +++ b/clang/test/CodeGenCXX/builtin-source-location.cpp @@ -1,6 +1,4 @@ // RUN: %clang_cc1 -no-opaque-pointers -std=c++2a -fblocks %s -triple x86_64-unknown-unknown -emit-llvm -o %t.ll -// RUN: %clang_cc1 -no-opaque-pointers -std=c++14 -fblocks %s -triple x86_64-unknown-unknown -emit-llvm -o %t.ll - // This needs to be performed before #line directives which alter filename // RUN: %clang_cc1 -no-opaque-pointers -fno-file-reproducible -fmacro-prefix-map=%p=/UNLIKELY/PATH -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-PREFIX-MAP diff --git a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp b/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp deleted file mode 100644 index 54a02ffc06836..0000000000000 --- a/clang/test/CodeGenCXX/default-arguments-with-immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %clang_cc1 -std=c++2a -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s - -consteval int immediate() { return 0;} -static int ext(); -void f(int a = immediate() + ext()); - -void test_function() { - f(); - f(0); - // CHECK: call noundef i32 @_ZL3extv() - // CHECK: add - // CHECK: call {{.*}} @_Z1fi - // CHECK: call {{.*}} @_Z1fi -} - -// CHECK: define {{.*}} i32 @_ZL3extv() - -static constexpr int not_immediate(); -struct A { - int a = immediate() + not_immediate(); -}; - -void test_member() { - // CHECK: call void @_ZN1AC2Ev - A defaulted; - // CHECK-NOT: call void @_ZN1AC2Ev - A provided{0}; -} - -// CHECK: define {{.*}} void @_ZN1AC2Ev{{.*}} -// CHECK: %call = call noundef i32 @_ZL13not_immediatev() - -int never_referenced() {return 42;}; - - -namespace not_used { - -struct A { - int a = immediate() + never_referenced(); -}; -void f(int a = immediate() + never_referenced()); - -void g() { - A a{0}; - f(0); -} - -} - -static int ext() {return 0;} -static constexpr int not_immediate() {return 0;} - -// CHECK-NOT: define {{.*}} i32 _ZL16never_referencedv()( -// CHECK: define {{.*}} i32 @_ZL13not_immediatev() diff --git a/clang/test/PCH/default-argument-with-immediate-calls.cpp b/clang/test/PCH/default-argument-with-immediate-calls.cpp deleted file mode 100644 index 510605a23d4e7..0000000000000 --- a/clang/test/PCH/default-argument-with-immediate-calls.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// RUN: %clang_cc1 -std=c++20 -emit-pch %s -o %t -// RUN: %clang_cc1 -std=c++20 -include-pch %t -verify %s -// expected-no-diagnostics - -#ifndef HEADER_INCLUDED -#define HEADER_INCLUDED - -consteval int immediate(); -int regular_function() { - return 0; -} - -struct S { - int a = immediate() + regular_function(); -}; - -int f(int arg = immediate()) { - return arg; -} - -#else - -consteval int immediate() { - return 0; -} - -void test() { - f(0); - f(); - S s{0}; - S t{0}; -} - -#endif diff --git a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp b/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp deleted file mode 100644 index 511306e0d921a..0000000000000 --- a/clang/test/SemaCXX/cxx2a-consteval-default-params.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s - -consteval int undefined(); // expected-note 4 {{declared here}} - -void check_lambdas_unused( - int a = [] - { - // The body of a lambda is not a subexpression of the lambda - // so this is immediately evaluated even if the parameter - // is never used. - return undefined(); // expected-error {{not a constant expression}} \ - // expected-note {{undefined function 'undefined'}} - }(), - int b = [](int no_error = undefined()) { - return no_error; - }(0), - int c = [](int defaulted = undefined()) { - return defaulted; - }() -) {} - -int check_lambdas_used( - int b = [](int no_error = undefined()) { - return no_error; - }(0), - int c = [](int defaulted = undefined()) { // expected-error {{not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{undefined function 'undefined'}} - return defaulted; - }(), // expected-note {{in the default initalizer of 'defaulted'}} - int d = [](int defaulted = sizeof(undefined())) { - return defaulted; - }() -) { - return 0; -} - -int test_check_lambdas_used = check_lambdas_used(); - -struct UnusedInitWithLambda { - int a = [] { - return undefined(); // expected-error {{not a constant expression}} \ - // expected-note {{undefined function 'undefined'}} - }(); - // UnusedInitWithLambda is never constructed, so the initializer - // of b and undefined() are never evaluated. - int b = [](int no_error = undefined()) { - return no_error; - }(); -}; - -consteval int ub(int n) { - return 0/n; // expected-note {{division}} -} - -struct InitWithLambda { - int b = [](int error = undefined()) { // expected-error {{not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{undefined function 'undefined'}} - return error; - }(); // expected-note {{in the default initalizer of 'error'}} - int c = [](int error = sizeof(undefined()) + ub(0)) { // expected-error {{'ub' is not a constant expression}} \ - // expected-note {{declared here}} \ - // expected-note {{in call to 'ub(0)}} - return error; - }(); // expected-note {{in the default initalizer of 'error'}} -} i; // expected-note {{in implicit default constructor}} diff --git a/clang/test/SemaCXX/source_location.cpp b/clang/test/SemaCXX/source_location.cpp index 9cfe9207dd14d..ccb385f60dc4b 100644 --- a/clang/test/SemaCXX/source_location.cpp +++ b/clang/test/SemaCXX/source_location.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -std=c++1z -fcxx-exceptions -fexceptions -verify %s -// RUN: %clang_cc1 -std=c++2a -fcxx-exceptions -DUSE_CONSTEVAL -fexceptions -verify %s // expected-no-diagnostics #define assert(...) ((__VA_ARGS__) ? ((void)0) : throw 42) @@ -9,22 +8,15 @@ template struct Printer; -#ifdef USE_CONSTEVAL -#define SOURCE_LOC_EVAL_KIND consteval -#else -#define SOURCE_LOC_EVAL_KIND constexpr -#endif - namespace std { class source_location { struct __impl; public: - static SOURCE_LOC_EVAL_KIND source_location - current(const __impl *__p = __builtin_source_location()) noexcept { - source_location __loc; - __loc.__m_impl = __p; - return __loc; + static constexpr source_location current(const __impl *__p = __builtin_source_location()) noexcept { + source_location __loc; + __loc.__m_impl = __p; + return __loc; } constexpr source_location() = default; constexpr source_location(source_location const &) = default; @@ -601,51 +593,3 @@ namespace TestConstexprContext { } static_assert(test()); } - -namespace Lambda { -#line 8000 "TestLambda.cpp" -constexpr int nested_lambda(int l = []{ - return SL::current().line(); -}()) { - return l; -} -static_assert(nested_lambda() == __LINE__ - 4); - -constexpr int lambda_param(int l = [](int l = SL::current().line()) { - return l; -}()) { - return l; -} -static_assert(lambda_param() == __LINE__); - - -} - -constexpr int compound_literal_fun(int a = - (int){ SL::current().line() } -) { return a ;} -static_assert(compound_literal_fun() == __LINE__); - -struct CompoundLiteral { - int a = (int){ SL::current().line() }; -}; -static_assert(CompoundLiteral{}.a == __LINE__); - - -// FIXME -// Init captures are subexpressions of the lambda expression -// so according to the standard immediate invocations in init captures -// should be evaluated at the call site. -// However Clang does not yet implement this as it would introduce -// a fair bit of complexity. -// We intend to implement that functionality once we find real world -// use cases that require it. -constexpr int test_init_capture(int a = - [b = SL::current().line()] { return b; }()) { - return a; -} -#ifdef USE_CONSTEVAL -static_assert(test_init_capture() == __LINE__ - 4); -#else -static_assert(test_init_capture() == __LINE__ ); -#endif From 4ec7dff27d90d7abe263cfae115cc959c05f0080 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Fri, 4 Nov 2022 12:37:07 -0700 Subject: [PATCH 283/516] [CMake] Only set LLVM_DEFAULT_TARGET_TRIPLE to LLVM_HOST_TRIPLE when native target is enabled This is for case when native target like X86 is not in LLVM_TARGETS_TO_BUILD. Right now LLVM_DEFAULT_TARGET_TRIPLE is set to LLVM_HOST_TRIPLE even when native target is not enabled, As a result, many lit tests will fail because default_triple is set for lit test but not enabled when build LLVM. Reviewed By: smeenai Differential Revision: https://reviews.llvm.org/D134972 --- llvm/CMakeLists.txt | 5 ++++- .../Examples/OrcV2Examples/lljit-with-remote-debugging.test | 2 ++ .../Examples/OrcV2Examples/lljit-with-thinlto-summaries.test | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 62a3fe96cb937..aafdbbe0ef298 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -837,7 +837,10 @@ include(config-ix) if("${LLVM_HOST_TRIPLE}" MATCHES "^powerpc64-ibm-aix") string(REGEX REPLACE "^powerpc64" "powerpc" LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") else() - set(LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") + # Only set default triple when native target is enabled. + if (LLVM_NATIVE_TARGET) + set(LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") + endif() endif() set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE_default}" CACHE STRING diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test index a09d72a76bef9..d34208136c3e2 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test @@ -1,6 +1,8 @@ # This test makes sure that the example builds and executes as expected. # Instructions for debugging can be found in LLJITWithRemoteDebugging.cpp +# REQUIRES: default_triple + # RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll | FileCheck --check-prefix=CHECK0 %s # CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll # CHECK0: Running: main() diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test index f82ac41bce38d..21112b825ba5b 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test @@ -2,6 +2,8 @@ # RUN: opt -module-summary %p/Inputs/foo-mod.ll -o foo-mod.bc # RUN: opt -module-summary %p/Inputs/bar-mod.ll -o bar-mod.bc +# REQUIRES: default_triple + # RUN: llvm-lto -thinlto -o main-foo-bar main-mod.bc foo-mod.bc bar-mod.bc # RUN: LLJITWithThinLTOSummaries main-foo-bar.thinlto.bc 2>&1 | FileCheck %s From ce90957461d5d5e4290a61267b4726d3842483d7 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Fri, 4 Nov 2022 17:37:18 -0400 Subject: [PATCH 284/516] [mlir][spirv] Fold noop `BitcastsOp`s This allows for bitcast conversion to roundtrip. Fixes: https://github.com/llvm/llvm-project/issues/58801 Reviewed By: antiagainst, Hardcode84, mravishankar Differential Revision: https://reviews.llvm.org/D137459 --- .../mlir/Dialect/SPIRV/IR/SPIRVCastOps.td | 2 +- .../SPIRV/IR/SPIRVCanonicalization.cpp | 20 +++++++++++++--- .../Dialect/SPIRV/IR/SPIRVCanonicalization.td | 7 ------ .../SPIRV/Transforms/canonicalize.mlir | 24 +++++++++++++++++++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td index c985c6e94e19e..8975fa01df403 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td @@ -88,7 +88,7 @@ def SPIRV_BitcastOp : SPIRV_Op<"Bitcast", [Pure]> { let assemblyFormat = [{ $operand attr-dict `:` type($operand) `to` type($result) }]; - let hasCanonicalizer = 1; + let hasFolder = 1; } // ----- diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp index b3444d8b210a6..57e6475548642 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp @@ -116,9 +116,23 @@ void spirv::AccessChainOp::getCanonicalizationPatterns( // spirv.BitcastOp //===----------------------------------------------------------------------===// -void spirv::BitcastOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); +OpFoldResult spirv::BitcastOp::fold(ArrayRef /*operands*/) { + Value arg = getOperand(); + if (getType() == arg.getType()) + return arg; + + // Look through nested bitcasts. + if (auto bitcast = arg.getDefiningOp()) { + Value nestedArg = bitcast.getOperand(); + if (nestedArg.getType() == getType()) + return nestedArg; + + getOperandMutable().assign(nestedArg); + return getResult(); + } + + // TODO(kuhar): Consider constant-folding the operand attribute. + return getResult(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td index 12c41fcaf0f00..e8d2274d29aa0 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td @@ -13,13 +13,6 @@ include "mlir/IR/PatternBase.td" include "mlir/Dialect/SPIRV/IR/SPIRVOps.td" -//===----------------------------------------------------------------------===// -// spirv.Bitcast -//===----------------------------------------------------------------------===// - -def ConvertChainedBitcast : Pat<(SPIRV_BitcastOp (SPIRV_BitcastOp $operand)), - (SPIRV_BitcastOp $operand)>; - //===----------------------------------------------------------------------===// // spirv.LogicalNot //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir index b13d6443850c9..e65f92e66bb47 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir @@ -86,6 +86,30 @@ func.func @convert_bitcast_multi_use(%arg0 : vector<2xf32>, %arg1 : !spirv.ptr i64 { + // CHECK: spirv.ReturnValue %[[ARG]] + %0 = spirv.Bitcast %arg0 : i64 to f64 + %1 = spirv.Bitcast %0 : f64 to i64 + spirv.ReturnValue %1 : i64 +} + +// ----- + +// CHECK-LABEL: @convert_bitcast_chained_roundtip +// CHECK-SAME: %[[ARG:.+]]: i64 +func.func @convert_bitcast_chained_roundtip(%arg0 : i64) -> i64 { + // CHECK: spirv.ReturnValue %[[ARG]] + %0 = spirv.Bitcast %arg0 : i64 to f64 + %1 = spirv.Bitcast %0 : f64 to vector<2xi32> + %2 = spirv.Bitcast %1 : vector<2xi32> to vector<2xf32> + %3 = spirv.Bitcast %2 : vector<2xf32> to i64 + spirv.ReturnValue %3 : i64 +} + +// ----- + //===----------------------------------------------------------------------===// // spirv.CompositeExtract //===----------------------------------------------------------------------===// From 7b7ec60dccb5f44d33c36e89bd4df008c618468b Mon Sep 17 00:00:00 2001 From: Wanyi Ye Date: Fri, 4 Nov 2022 12:01:00 -0700 Subject: [PATCH 285/516] Fix test TestVSCode_terminatedEvent.py This test is broken due to the flaky encoding of top-level JSON key 'memory' When I run locally (linux) the test passed. However, it failed the build bot: https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/48111/ I will find a way to repro before I can actually fix this issue correctly. https://reviews.llvm.org/D137455 --- .../lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py b/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py index bc516a0ed0e37..a288012530881 100644 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py +++ b/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py @@ -49,8 +49,6 @@ def test_terminated_event(self): self.assertTrue(statistics['totalDebugInfoEnabled'] > 0) self.assertTrue(statistics['totalModuleCountHasDebugInfo'] > 0) - self.assertIsNotNone(statistics['memory']) - # lldb-vscode debugs one target at a time target = json.loads(statistics['targets'])[0] self.assertTrue(target['totalBreakpointResolveTime'] > 0) From 09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Oct 2022 22:41:41 -0700 Subject: [PATCH 286/516] AMDGPU: Fix assert when trying to overextend liverange This was trying to add segments beyond the new and use, so skip additional segments. This would hit (S < E && "Cannot create empty or backwards segment"). --- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 2 +- ...cannot-create-empty-or-backward-segment.ll | 176 ++++++++++++++++++ ...pt-exec-masking-pre-ra-update-liveness.mir | 103 ++++++++++ 3 files changed, 280 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index aed84437b8908..85de3a5484111 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -226,7 +226,7 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); assert(DefSegment != SelLI->end() && "No live interval segment covering definition?"); - for (auto I = DefSegment; I != SelLI->end(); ++I) { + for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) { SlotIndex Start = I->start < SelIdx.getRegSlot() ? SelIdx.getRegSlot() : I->start; SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll new file mode 100644 index 0000000000000..95d7cbb82fb77 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck %s + +define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 %arg1, i1 %arg2, i1 %arg3, i1 %arg4, i1 %arg5) { +; CHECK-LABEL: cannot_create_empty_or_backwards_segment: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_mov_b64 s[26:27], s[2:3] +; CHECK-NEXT: s_mov_b64 s[24:25], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: s_add_u32 s24, s24, s7 +; CHECK-NEXT: s_addc_u32 s25, s25, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[14:15], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 8 +; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 16 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 24 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: s_xor_b64 s[2:3], s[6:7], -1 +; CHECK-NEXT: s_bitcmp1_b32 s1, 0 +; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s1, 8 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[14:15] +; CHECK-NEXT: s_cselect_b64 s[12:13], -1, 0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 +; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] +; CHECK-NEXT: s_and_b64 s[4:5], exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: s_branch .LBB0_3 +; CHECK-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: .LBB0_2: ; %Flow7 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[22:23] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_12 +; CHECK-NEXT: .LBB0_3: ; %bb7 +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 +; CHECK-NEXT: ; %bb.4: ; %bb8 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[2:3] +; CHECK-NEXT: s_cbranch_vccz .LBB0_6 +; CHECK-NEXT: ; %bb.5: ; %bb9 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], s[8:9] +; CHECK-NEXT: s_cbranch_execz .LBB0_7 +; CHECK-NEXT: s_branch .LBB0_8 +; CHECK-NEXT: .LBB0_6: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[22:23], 0 +; CHECK-NEXT: .LBB0_7: ; %bb10 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], s[12:13] +; CHECK-NEXT: .LBB0_8: ; %Flow9 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[22:23] +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 +; CHECK-NEXT: ; %bb.9: ; %bb13 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[4:5] +; CHECK-NEXT: s_cbranch_vccz .LBB0_11 +; CHECK-NEXT: ; %bb.10: ; %bb16 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11] +; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: s_mov_b64 s[20:21], 0 +; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17 +; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_12: ; %loop.exit.guard6 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_xor_b64 s[14:15], s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[14:15] +; CHECK-NEXT: s_cbranch_vccz .LBB0_16 +; CHECK-NEXT: ; %bb.13: ; %bb14 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[14:15] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_15 +; CHECK-NEXT: ; %bb.14: ; %bb15 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 +; CHECK-NEXT: .LBB0_15: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[20:21], 0 +; CHECK-NEXT: .LBB0_16: ; %Flow13 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_3 +; CHECK-NEXT: ; %bb.17: ; %loop.exit.guard +; CHECK-NEXT: s_and_b64 vcc, exec, s[18:19] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 +; CHECK-NEXT: ; %bb.18: ; %loop.exit.guard5 +; CHECK-NEXT: s_and_b64 vcc, exec, s[16:17] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 +; CHECK-NEXT: ; %bb.19: ; %bb17 +; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] +; CHECK-NEXT: s_cbranch_vccz .LBB0_21 +; CHECK-NEXT: ; %bb.20: ; %bb19 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB0_22 +; CHECK-NEXT: .LBB0_21: ; %bb21 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_22: ; %UnifiedUnreachableBlock +bb: + br label %bb6 + +bb6: ; preds = %bb15, %bb14, %bb + br label %bb7 + +bb7: ; preds = %bb16, %bb6 + br i1 %arg2, label %bb8, label %bb20 + +bb8: ; preds = %bb7 + br i1 %arg3, label %bb10, label %bb9 + +bb9: ; preds = %bb8 + br i1 %arg1, label %bb13, label %bb12 + +bb10: ; preds = %bb8 + br i1 %arg5, label %bb11, label %bb12 + +bb11: ; preds = %bb10 + br label %bb13 + +bb12: ; preds = %bb10, %bb9 + unreachable + +bb13: ; preds = %bb11, %bb9 + br i1 %arg1, label %bb16, label %bb14 + +bb14: ; preds = %bb13 + br i1 %arg, label %bb15, label %bb6 + +bb15: ; preds = %bb14 + store double 0.000000e+00, ptr addrspace(5) null, align 2147483648 + br label %bb6 + +bb16: ; preds = %bb13 + br i1 %arg4, label %bb17, label %bb7 + +bb17: ; preds = %bb16 + br i1 %arg3, label %bb19, label %bb18 + +bb18: ; preds = %bb17 + ret void + +bb19: ; preds = %bb17 + br i1 %arg, label %bb20, label %bb21 + +bb20: ; preds = %bb19, %bb7 + unreachable + +bb21: ; preds = %bb19 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir index 1403f9bd1cf0d..ae2c77ca87039 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir @@ -686,3 +686,106 @@ body: | bb.3: ... + +# This was trying to extend the liverange of %0 farther than needed, +# following %1's segment to %bb3 + +--- +name: cannot_create_empty_or_backwards_segment +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + bb.0: + liveins: $sgpr4_sgpr5 + + %0:sreg_64_xexec = COPY $sgpr4_sgpr5 + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.3, implicit killed undef $vcc + + bb.2: + S_ENDPGM 0 + + bb.3: + S_ENDPGM 0, implicit %1 +... + +--- +name: cannot_create_empty_or_backwards_segment_2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment_2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit-def dead [[V_CNDMASK_B32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr4_sgpr5 + + bb.1: + liveins: $sgpr4_sgpr5 + + %0:sreg_64_xexec = COPY $sgpr4_sgpr5 + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc + + bb.2: + liveins: $sgpr4_sgpr5 + S_NOP 0, implicit-def %1, implicit %1 + S_CBRANCH_VCCNZ %bb.4, implicit killed undef $vcc + S_BRANCH %bb.1 + + bb.3: + S_ENDPGM 0 + + bb.4: + S_ENDPGM 0 +... From de14befa7730093ff3d46c7628fa1084f251e98d Mon Sep 17 00:00:00 2001 From: Jennifer Yu Date: Wed, 2 Nov 2022 15:43:26 -0700 Subject: [PATCH 287/516] Remove redundant loads. It is caused by regenerate captured var value when processing the has_device_addr, the captured var value has been generated in GenerateOpenMPCapturedVars and passed as Arg in generateInfoForCapture. The fix just use Arg instead regenerated just same as is_device_ptr --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 26 +- .../OpenMP/target_has_device_addr_codegen.cpp | 25 +- .../target_has_device_addr_codegen_01.cpp | 249 +++++++++--------- .../test/mapping/has_device_addr.cpp | 51 ++++ 4 files changed, 185 insertions(+), 166 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e52989b7c139b..b87e69b641a63 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8739,7 +8739,7 @@ class MappableExprsHandler { // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. - if (VD && DevPointersMap.count(VD)) { + if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.emplace_back(Arg, VD); CombinedInfo.Pointers.push_back(Arg); @@ -8752,30 +8752,6 @@ class MappableExprsHandler { CombinedInfo.Mappers.push_back(nullptr); return; } - if (VD && HasDevAddrsMap.count(VD)) { - auto I = HasDevAddrsMap.find(VD); - CombinedInfo.Exprs.push_back(VD); - Expr *E = nullptr; - for (auto &MCL : I->second) { - E = MCL.begin()->getAssociatedExpression(); - break; - } - llvm::Value *Ptr = nullptr; - if (E->isGLValue()) - Ptr = CGF.EmitLValue(E).getPointer(CGF); - else - Ptr = CGF.EmitScalarExpr(E); - CombinedInfo.BasePointers.emplace_back(Ptr, VD); - CombinedInfo.Pointers.push_back(Ptr); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, - /*isSigned=*/true)); - CombinedInfo.Types.push_back( - (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | - OMP_MAP_TARGET_PARAM); - CombinedInfo.Mappers.push_back(nullptr); - return; - } using MapData = std::tuple Date: Fri, 4 Nov 2022 15:37:18 -0700 Subject: [PATCH 288/516] Revert "[lldb-vscode] Send Statistics Dump in terminated event" This reverts commit e3ccbae309273900a42e30b606c15c873d57f1ea. There is a bug which is failing the test running on mac. --- .../test/tools/lldb-vscode/vscode.py | 8 +-- .../lldb-vscode/terminated-event/Makefile | 17 ----- .../TestVSCode_terminatedEvent.py | 61 ---------------- .../lldb-vscode/terminated-event/foo.cpp | 3 - .../tools/lldb-vscode/terminated-event/foo.h | 1 - .../lldb-vscode/terminated-event/main.cpp | 8 --- lldb/tools/lldb-vscode/JSONUtils.cpp | 69 ------------------- lldb/tools/lldb-vscode/JSONUtils.h | 6 -- lldb/tools/lldb-vscode/lldb-vscode.cpp | 4 +- 9 files changed, 3 insertions(+), 174 deletions(-) delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/Makefile delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/foo.h delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index c2de4ad5c7d9a..d6a6abca53e38 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -369,13 +369,7 @@ def wait_for_stopped(self, timeout=None): def wait_for_exited(self): event_dict = self.wait_for_event('exited') if event_dict is None: - raise ValueError("didn't get exited event") - return event_dict - - def wait_for_terminated(self): - event_dict = self.wait_for_event('terminated') - if event_dict is None: - raise ValueError("didn't get terminated event") + raise ValueError("didn't get stopped event") return event_dict def get_initialize_value(self, key): diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile b/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile deleted file mode 100644 index b30baf48b972e..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -DYLIB_NAME := foo -DYLIB_CXX_SOURCES := foo.cpp -CXX_SOURCES := main.cpp - -LD_EXTRAS := -Wl,-rpath "-Wl,$(shell pwd)" -USE_LIBDL :=1 - -include Makefile.rules - -all: a.out.stripped - -a.out.stripped: - strip -o a.out.stripped a.out - -ifneq "$(CODESIGN)" "" - $(CODESIGN) -fs - a.out.stripped -endif \ No newline at end of file diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py b/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py deleted file mode 100644 index a288012530881..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -Test lldb-vscode terminated event -""" - -import vscode -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil -import lldbvscode_testcase -import re -import json - -class TestVSCode_terminatedEvent(lldbvscode_testcase.VSCodeTestCaseBase): - - @skipIfWindows - @skipIfRemote - def test_terminated_event(self): - ''' - Terminated Event - Now contains the statistics of a debug session: - metatdata: - totalDebugInfoByteSize > 0 - totalDebugInfoEnabled > 0 - totalModuleCountHasDebugInfo > 0 - ... - targetInfo: - totalBreakpointResolveTime > 0 - breakpoints: - recognize function breakpoint - recognize source line breakpoint - It should contains the breakpoints info: function bp & source line bp - ''' - - program_basename = "a.out.stripped" - program = self.getBuildArtifact(program_basename) - self.build_and_launch(program) - # Set breakpoints - functions = ['foo'] - breakpoint_ids = self.set_function_breakpoints(functions) - self.assertEquals(len(breakpoint_ids), len(functions), 'expect one breakpoint') - main_bp_line = line_number('main.cpp', '// main breakpoint 1') - breakpoint_ids.append(self.set_source_breakpoints('main.cpp', [main_bp_line])) - - self.continue_to_breakpoints(breakpoint_ids) - self.continue_to_exit() - - statistics = self.vscode.wait_for_terminated()['statistics'] - self.assertTrue(statistics['totalDebugInfoByteSize'] > 0) - self.assertTrue(statistics['totalDebugInfoEnabled'] > 0) - self.assertTrue(statistics['totalModuleCountHasDebugInfo'] > 0) - - # lldb-vscode debugs one target at a time - target = json.loads(statistics['targets'])[0] - self.assertTrue(target['totalBreakpointResolveTime'] > 0) - - breakpoints = target['breakpoints'] - self.assertIn('foo', - breakpoints[0]['details']['Breakpoint']['BKPTResolver']['Options']['SymbolNames'], - 'foo is a symbol breakpoint') - self.assertTrue(breakpoints[1]['details']['Breakpoint']['BKPTResolver']['Options']['FileName'].endswith('main.cpp'), - 'target has source line breakpoint in main.cpp') diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp deleted file mode 100644 index 9dba85a9cccab..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp +++ /dev/null @@ -1,3 +0,0 @@ -int foo() { - return 12; -} diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h deleted file mode 100644 index 5d5f8f0c9e786..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h +++ /dev/null @@ -1 +0,0 @@ -int foo(); diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp b/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp deleted file mode 100644 index cd984e560e0d2..0000000000000 --- a/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include -#include "foo.h" - -int main(int argc, char const *argv[]) { - std::cout << "Hello World!" << std::endl; // main breakpoint 1 - foo(); - return 0; -} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index bd8a9148c241f..39c24f8b23e39 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -19,8 +19,6 @@ #include "lldb/API/SBBreakpoint.h" #include "lldb/API/SBBreakpointLocation.h" #include "lldb/API/SBDeclaration.h" -#include "lldb/API/SBStringList.h" -#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBValue.h" #include "lldb/Host/PosixApi.h" @@ -1142,73 +1140,6 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, return reverse_request; } -// Keep all the top level items from the statistics dump, except for the -// "modules" array. It can be huge and cause delay -// Array and dictionary value will return as pairs -void FilterAndGetValueForKey(const lldb::SBStructuredData data, const char *key, - llvm::json::Object &out) { - lldb::SBStructuredData value = data.GetValueForKey(key); - std::string key_utf8 = llvm::json::fixUTF8(key); - if (strcmp(key, "modules") == 0) - return; - switch (value.GetType()) { - case lldb::eStructuredDataTypeFloat: - out.try_emplace(key_utf8, value.GetFloatValue()); - break; - case lldb::eStructuredDataTypeInteger: - out.try_emplace(key_utf8, value.GetIntegerValue()); - break; - case lldb::eStructuredDataTypeArray: { - lldb::SBStream contents; - value.GetAsJSON(contents); - EmplaceSafeString(out, key, contents.GetData()); - } break; - case lldb::eStructuredDataTypeBoolean: - out.try_emplace(key_utf8, value.GetBooleanValue()); - break; - case lldb::eStructuredDataTypeString: { - // Get the string size before reading - const size_t str_length = value.GetStringValue(nullptr, 0); - std::string str(str_length + 1, 0); - value.GetStringValue(&str[0], str_length); - EmplaceSafeString(out, key, str); - } break; - case lldb::eStructuredDataTypeDictionary: { - lldb::SBStream contents; - value.GetAsJSON(contents); - EmplaceSafeString(out, key, contents.GetData()); - } break; - case lldb::eStructuredDataTypeNull: - case lldb::eStructuredDataTypeGeneric: - case lldb::eStructuredDataTypeInvalid: - break; - } -} - -void addStatistic(llvm::json::Object &event) { - lldb::SBStructuredData statistics = g_vsc.target.GetStatistics(); - bool is_dictionary = - statistics.GetType() == lldb::eStructuredDataTypeDictionary; - if (!is_dictionary) - return; - llvm::json::Object stats_body; - - lldb::SBStringList keys; - if (!statistics.GetKeys(keys)) - return; - for (size_t i = 0; i < keys.GetSize(); i++) { - const char *key = keys.GetStringAtIndex(i); - FilterAndGetValueForKey(statistics, key, stats_body); - } - event.try_emplace("statistics", std::move(stats_body)); -} - -llvm::json::Object CreateTerminatedEventObject() { - llvm::json::Object event(CreateEventObject("terminated")); - addStatistic(event); - return event; -} - std::string JSONToString(const llvm::json::Value &json) { std::string data; llvm::raw_string_ostream os(data); diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index c812ec87beab0..bb81b88895938 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -485,12 +485,6 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request, llvm::StringRef debug_adaptor_path, llvm::StringRef comm_file); -/// Create a "Terminated" JSON object that contains statistics -/// -/// \return -/// A body JSON object with debug info and breakpoint info -llvm::json::Object CreateTerminatedEventObject(); - /// Convert a given JSON object to a string. std::string JSONToString(const llvm::json::Value &json); diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 21d2bc2229043..1c6f9c829c388 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -204,7 +204,7 @@ void SendTerminatedEvent() { g_vsc.sent_terminated_event = true; g_vsc.RunTerminateCommands(); // Send a "terminated" event - llvm::json::Object event(CreateTerminatedEventObject()); + llvm::json::Object event(CreateEventObject("terminated")); g_vsc.SendJSON(llvm::json::Value(std::move(event))); } } @@ -2949,7 +2949,7 @@ void request_variables(const llvm::json::Object &request) { const uint32_t addr_size = g_vsc.target.GetProcess().GetAddressByteSize(); lldb::SBValue reg_set = g_vsc.variables.registers.GetValueAtIndex(0); const uint32_t num_regs = reg_set.GetNumChildren(); - for (uint32_t reg_idx = 0; reg_idx < num_regs; ++reg_idx) { + for (uint32_t reg_idx=0; reg_idx Date: Tue, 18 Oct 2022 16:41:03 +0000 Subject: [PATCH 289/516] [mlir][sparse] support Parallel for/reduction. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D135927 --- .../SparseTensor/Transforms/CodegenUtils.cpp | 146 +++++++++++++----- .../SparseTensor/Transforms/CodegenUtils.h | 32 +++- .../Transforms/Sparsification.cpp | 133 +++++++--------- .../Dialect/SparseTensor/sparse_parallel.mlir | 20 +-- .../SparseTensor/sparse_parallel_reduce.mlir | 63 ++++++++ .../SparseTensor/CPU/sparse_matmul.mlir | 8 + .../SparseTensor/CPU/sparse_matvec.mlir | 10 ++ 7 files changed, 285 insertions(+), 127 deletions(-) create mode 100644 mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 1e9cadd13e156..85b0dd7601157 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -219,9 +219,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { + assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); + // TODO: support multiple return on parallel for? + assert(!isParallel || reduc.empty() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; @@ -232,11 +235,38 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; + Operation *loop = nullptr; + Value iv; + if (isParallel) { + scf::ParallelOp parOp = + builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(parOp.getBody()); + assert(parOp.getNumReductions() == reduc.size()); + iv = parOp.getInductionVars()[0]; + + // In-place update on the reduction variable vector. + // Note that the init vals is not the actual reduction variables but instead + // used as a `special handle` to (temporarily) represent them. The + // expression on init vals will be moved into scf.reduce and replaced with + // the block arguments when exiting the loop (see exitForLoop). This is + // needed as we can not build the actual reduction block and get the actual + // reduction varaible before users fill parallel loop body. + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = parOp.getInitVals()[i]; + loop = parOp; + } else { + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(forOp.getBody()); + iv = forOp.getInductionVar(); + + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + loop = forOp; + } + assert(loop && iv); - scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(forOp.getBody()); - Value iv = forOp.getInductionVar(); - assert(iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. @@ -253,16 +283,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( // NOTE: we can also prepares for next dim here in advance // Push the loop into stack - loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, coord[tid][dim]); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); - // In-place update on the reduction variable vector. - assert(forOp.getNumRegionIterArgs() == reduc.size()); - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = forOp.getRegionIterArg(i); - return forOp; + return loop; } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( @@ -434,17 +460,73 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( } } -SmallVector -SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; - auto forOp = llvm::cast(loopInfo.loop); - if (!reduc.empty()) { - assert(reduc.size() == forOp.getNumResults()); - builder.setInsertionPointToEnd(forOp.getBody()); - builder.create(loc, reduc); + auto forOp = llvm::dyn_cast(loopInfo.loop); + if (forOp) { + if (!reduc.empty()) { + assert(reduc.size() == forOp.getNumResults()); + rewriter.setInsertionPointToEnd(forOp.getBody()); + rewriter.create(loc, reduc); + } + // Exit the loop. + rewriter.setInsertionPointAfter(forOp); + // In-place update reduction variables. + for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) + reduc[i] = forOp.getResult(i); + } else { + auto parOp = llvm::cast(loopInfo.loop); + if (!reduc.empty()) { + assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); + Operation *redExp = reduc.front().getDefiningOp(); + // Reduction expression should have no use. + assert(redExp->getUses().empty()); + // This must be a binary operation. + // NOTE: This is users' responsibilty to ensure the operation are + // commutative. + assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); + + Value redVal = parOp.getInitVals().front(); + Value curVal; + if (redExp->getOperand(0) == redVal) + curVal = redExp->getOperand(1); + else if (redExp->getOperand(1) == redVal) + curVal = redExp->getOperand(0); + // One of the operands must be the init value (which is also the + // previous reduction value). + assert(curVal); + // The reduction expression should be the only user of the reduction val + // inside the parallel for. + unsigned numUsers = 0; + for (Operation *op : redVal.getUsers()) { + if (op->getParentOp() == parOp) + numUsers++; + } + assert(numUsers == 1); + (void)numUsers; // to silence unused variable warning in release build + + rewriter.setInsertionPointAfter(redExp); + auto redOp = rewriter.create(loc, curVal); + // Attach to the reduction op. + Block *redBlock = &redOp.getRegion().getBlocks().front(); + rewriter.setInsertionPointToEnd(redBlock); + Operation *newRed = rewriter.clone(*redExp); + // Replaces arguments of the reduction expression by using the block + // arguments from scf.reduce. + rewriter.updateRootInPlace( + newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); + // Erases the out-dated reduction expression. + rewriter.eraseOp(redExp); + rewriter.setInsertionPointToEnd(redBlock); + rewriter.create(loc, newRed->getResult(0)); + } + rewriter.setInsertionPointAfter(parOp); + // In-place update reduction variables. + for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) + reduc[i] = parOp.getResult(i); } // Finished iterating a tensor, clean up @@ -458,14 +540,10 @@ SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } - // exit the loop - builder.setInsertionPointAfter(forOp); - return forOp.getResults(); } -SmallVector -SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCoIterationLoop( + OpBuilder &builder, Location loc, MutableArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; @@ -499,10 +577,10 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, } // Reduction value from users. - SmallVector ret; - for (auto red : reduc) { - operands.push_back(red); - ret.push_back(whileOp->getResult(o++)); + for (unsigned i = 0, e = reduc.size(); i < e; i++) { + operands.push_back(reduc[i]); + // In place update reduction variable. + reduc[i] = whileOp->getResult(o++); } // An (optional) universal index. @@ -517,26 +595,24 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); - return ret; } -SmallVector -SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, + Location loc, + MutableArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - red = exitCoiterationLoop(builder, loc, reduc); + exitCoIterationLoop(rewriter, loc, reduc); } else { - red = exitForLoop(builder, loc, reduc); + exitForLoop(rewriter, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); - return red; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 3228eb4c79cb2..a75d3920a4d55 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -380,8 +380,8 @@ class SparseTensorLoopEmitter { ArrayRef dims, bool needsUniv, MutableArrayRef reduc = {}, ArrayRef extraTids = {}, ArrayRef extraDims = {}); - SmallVector exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc = {}); + void exitCurrentLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc = {}); /// Returns the array of coordinate for all the loop generated till now. void getCoordinateArray(SmallVectorImpl &coords) const { @@ -452,17 +452,35 @@ class SparseTensorLoopEmitter { ArrayRef dims); /// Exits a for loop, returns the reduction results, e.g., + /// For sequential for loops: /// %ret = for () { /// ... + /// %val = addi %args, %c /// yield %val /// } - /// Return %ret to user, while %val is provided by users (`reduc`) - SmallVector exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + /// For parallel loops, the following generated code by users: + /// %ret = parallel () init(%args) { + /// ... + /// %val = op %args, %c + /// } + /// will be transformed into + /// %ret = parallel () init(%args) { + /// ... + /// scf.reduce(%c) bb0(%0, %1){ + /// %val = op %0, %1 + /// scf.reduce.return %val + /// } + /// } + /// NOTE: only one instruction will be moved into reduce block, transformation + /// will fail if multiple instructions are used to compute the reduction + /// value. + /// Return %ret to user, while %val is provided by users (`reduc`). + void exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc); /// Exits a while loop, returns the reduction results. - SmallVector exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + void exitCoIterationLoop(OpBuilder &builder, Location loc, + MutableArrayRef reduc); // Whether the loop emitter needs to treat the last tensor as the output // tensor. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 82125e34d5dff..11e9a649984cf 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -410,6 +410,34 @@ static Value getCustomRedId(Operation *op) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// +/// Generates loop boundary statements (entering/exiting loops). The function +/// passes and updates the reduction value. +static Optional genLoopBoundary( + CodeGen &codegen, Merger &merger, + function_ref(MutableArrayRef reduc)> + callback) { + SmallVector reduc; + if (codegen.redVal) + reduc.push_back(codegen.redVal); + if (codegen.expValues) + reduc.push_back(codegen.expCount); + if (codegen.insChain) + reduc.push_back(codegen.insChain); + + auto r = callback(reduc); + + // Callback should do in-place update on reduction value vector. + unsigned i = 0; + if (codegen.redVal) + updateReduc(merger, codegen, reduc[i++]); + if (codegen.expValues) + codegen.expCount = reduc[i++]; + if (codegen.insChain) + codegen.insChain = reduc[i]; + + return r; +} + /// Local bufferization of all dense and sparse data structures. static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { @@ -869,23 +897,25 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder, /// Returns parallelization strategy. Any implicit loop in the Linalg /// operation that is marked "parallel" is a candidate. Whether it is actually /// converted to a parallel operation depends on the requested strategy. -static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction, - bool isSparse) { +static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) { // Reject parallelization of sparse output. if (codegen.sparseOut) return false; + // Parallel loops on tensor expansion can cause data races. + if (codegen.expCount) + return false; // Inspect strategy. switch (codegen.options.parallelizationStrategy) { case SparseParallelizationStrategy::kNone: return false; case SparseParallelizationStrategy::kDenseOuterLoop: - return isOuter && !isSparse && !isReduction; + return isOuter && !isSparse; case SparseParallelizationStrategy::kAnyStorageOuterLoop: - return isOuter && !isReduction; + return isOuter; case SparseParallelizationStrategy::kDenseAnyLoop: - return !isSparse && !isReduction; + return !isSparse; case SparseParallelizationStrategy::kAnyStorageAnyLoop: - return !isReduction; + return true; } llvm_unreachable("unexpected parallelization strategy"); } @@ -898,33 +928,16 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef extraDims) { Location loc = op.getLoc(); auto iteratorTypes = op.getIteratorTypesArray(); - bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) || isSingletonDLT(merger.getDimLevelType(tid, idx)); - bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse); - assert(!isParallel); - - // Emit a sequential for loop. - SmallVector operands; - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim( - builder, loc, tid, dim, operands, isParallel, extraTids, extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); - + bool isParallel = isParallelFor(codegen, isOuter, isSparse); + + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + return codegen.loopEmitter.enterLoopOverTensorAtDim( + builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -934,29 +947,15 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef condTids, ArrayRef condDims, ArrayRef extraTids, ArrayRef extraDims) { - SmallVector operands; - - // Construct the while-loop with a parameter for each index. - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims( - builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids, - extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + // Construct the while-loop with a parameter for each index. + return codegen.loopEmitter.enterCoIterationOverTensorsAtDims( + builder, op.getLoc(), condTids, condDims, needsUniv, reduc, + extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -1186,37 +1185,21 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen, } /// Ends a single loop in current sequence. Returns new values for needsUniv. -static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder, +static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter, linalg::GenericOp op, Operation *loop, unsigned idx, unsigned li, bool needsUniv) { // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv, + finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv, merger.lat(li).bits, whileOp); } else { needsUniv = false; } - SmallVector reduc; - if (codegen.redVal) - reduc.push_back(codegen.redVal); - if (codegen.expValues) - reduc.push_back(codegen.expCount); - if (codegen.insChain) - reduc.push_back(codegen.insChain); - - auto loopRet = - codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc); - assert(reduc.size() == loopRet.size()); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, loopRet[o++]); - if (codegen.expValues) - codegen.expCount = loopRet[o++]; - if (codegen.insChain) - codegen.insChain = loopRet[o++]; - assert(o == loopRet.size()); + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc); + return llvm::None; + }); return needsUniv; } diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir index 38766b08ccab8..f38865c5e2a4f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -1,14 +1,13 @@ // RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \ // RUN: FileCheck %s --check-prefix=CHECK-PAR0 -// FIXME: we do not support vectorization/parallel loops in loop emitter right now -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR1 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR2 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR3 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR4 #DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] @@ -151,7 +150,8 @@ func.func @scale_ss(%scale: f32, // // CHECK-PAR4-LABEL: func @matvec // CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.for +// CHECK-PAR4: scf.parallel +// CHECK-PAR4: scf.reduce // CHECK-PAR4: return // func.func @matvec(%arga: tensor<16x32xf32, #CSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir new file mode 100644 index 0000000000000..8ba66d2c92ae1 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s + +#CSR = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ] +}> + +#trait_matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel", "reduction"], + doc = "x(i) += A(i,j) * b(j)" +} +// CHECK-LABEL: func.func @matvec( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>, +// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, +// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { +// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] +// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> +// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> +// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { +// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref +// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { +// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> +// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 +// CHECK: scf.reduce(%[[TMP_14]]) : f32 { +// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): +// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 +// CHECK: scf.reduce.return %[[TMP_15]] : f32 +// CHECK: } +// CHECK: scf.yield +// CHECK: } +// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: scf.yield +// CHECK: } +// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> +// CHECK: return %[[TMP_5]] : tensor<16xf32> +func.func @matvec(%arga: tensor<16x32xf32, #CSR>, + %argb: tensor<32xf32>, + %argx: tensor<16xf32>) -> tensor<16xf32> { + %0 = linalg.generic #trait_matvec + ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) + outs(%argx: tensor<16xf32>) { + ^bb(%A: f32, %b: f32, %x: f32): + %0 = arith.mulf %A, %b : f32 + %1 = arith.addf %0, %x : f32 + linalg.yield %1 : f32 + } -> tensor<16xf32> + return %0 : tensor<16xf32> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index c12d2b9b913e4..459b0e13667f6 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -2,6 +2,14 @@ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index 59e7f33c22c88..adc0b261f04d3 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -4,6 +4,16 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s \ +// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s !Filename = !llvm.ptr From a38db7bfc630e19902316465e8b6478793f6de75 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 4 Nov 2022 15:49:15 -0700 Subject: [PATCH 290/516] AMDGPU: Fix test failure --- ...cannot-create-empty-or-backward-segment.ll | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll index 95d7cbb82fb77..8c32b6c8f1b0b 100644 --- a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll +++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll @@ -31,9 +31,9 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_branch .LBB0_3 ; CHECK-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_mov_b64 s[18:19], -1 -; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 ; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 ; CHECK-NEXT: s_mov_b64 s[22:23], -1 ; CHECK-NEXT: .LBB0_2: ; %Flow7 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -49,19 +49,19 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 ; CHECK-NEXT: s_cbranch_vccz .LBB0_6 ; CHECK-NEXT: ; %bb.5: ; %bb9 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_mov_b64 s[18:19], 0 -; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 ; CHECK-NEXT: s_mov_b64 s[22:23], s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_7 ; CHECK-NEXT: s_branch .LBB0_8 ; CHECK-NEXT: .LBB0_6: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_mov_b64 s[18:19], -1 -; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 ; CHECK-NEXT: s_mov_b64 s[22:23], 0 ; CHECK-NEXT: .LBB0_7: ; %bb10 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_mov_b64 s[18:19], 0 -; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 ; CHECK-NEXT: s_mov_b64 s[22:23], s[12:13] ; CHECK-NEXT: .LBB0_8: ; %Flow9 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -108,10 +108,10 @@ define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 ; CHECK-NEXT: s_andn2_b64 vcc, exec, s[20:21] ; CHECK-NEXT: s_cbranch_vccnz .LBB0_3 ; CHECK-NEXT: ; %bb.17: ; %loop.exit.guard -; CHECK-NEXT: s_and_b64 vcc, exec, s[18:19] +; CHECK-NEXT: s_and_b64 vcc, exec, s[16:17] ; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 ; CHECK-NEXT: ; %bb.18: ; %loop.exit.guard5 -; CHECK-NEXT: s_and_b64 vcc, exec, s[16:17] +; CHECK-NEXT: s_and_b64 vcc, exec, s[18:19] ; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 ; CHECK-NEXT: ; %bb.19: ; %bb17 ; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] From ada6aa3f5c9693130747549f99b6bb27cff58f2f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 16 Oct 2022 21:35:07 -0700 Subject: [PATCH 291/516] AMDGPU: Fold undef rcp to qnan This matches the behavior in instcombine, and for fdiv. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +++++-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll | 4 +++- llvm/test/CodeGen/AMDGPU/select-undef.ll | 8 +++----- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 347b0ee9d3b46..64ebf1d2d8b60 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9991,8 +9991,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N, EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); - if (N0.isUndef()) - return N0; + if (N0.isUndef()) { + return DCI.DAG.getConstantFP( + APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N), + VT); + } if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP || N0.getOpcode() == ISD::SINT_TO_FP)) { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll index db6ee7bd0aeb6..929f935f69108 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -7,7 +7,9 @@ declare double @llvm.sqrt.f64(double) #0 declare float @llvm.sqrt.f32(float) #0 ; FUNC-LABEL: {{^}}rcp_undef_f32: -; SI-NOT: v_rcp_f32 +; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000 +; SI-NOT: [[NAN]] +; SI: buffer_store_dword [[NAN]] define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 { %rcp = call float @llvm.amdgcn.rcp.f32(float undef) store float %rcp, float addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll index f02cd3fc5e4e6..81deec1e0dbb8 100644 --- a/llvm/test/CodeGen/AMDGPU/select-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}select_undef_lhs: ; GCN: s_waitcnt @@ -6,8 +6,7 @@ ; GCN-NOT: v_cndmask ; GCN-NEXT: s_setpc_b64 define float @select_undef_lhs(float %val, i1 %cond) { - %undef = call float @llvm.amdgcn.rcp.f32(float undef) - %sel = select i1 %cond, float %undef, float %val + %sel = select i1 %cond, float undef, float %val ret float %sel } @@ -17,8 +16,7 @@ define float @select_undef_lhs(float %val, i1 %cond) { ; GCN-NOT: v_cndmask ; GCN-NEXT: s_setpc_b64 define float @select_undef_rhs(float %val, i1 %cond) { - %undef = call float @llvm.amdgcn.rcp.f32(float undef) - %sel = select i1 %cond, float %val, float %undef + %sel = select i1 %cond, float %val, float undef ret float %sel } From 292533324cadf0164a7e1d532508cb59775e0a72 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Fri, 4 Nov 2022 15:51:44 -0700 Subject: [PATCH 292/516] [libc++abi] Use std::nullptr_t instead of declaring it manually Sometimes libc++'s stddef.h wrapper gets included, which defines ::nullptr_t. This test is compiled with -Wshadow -Werror, so shadowing ::nullptr_t with a nullptr_t in main is an error. Include cstddef, which is guaranteed to define std::nullptr_t in C++11 and forward. Reviewed By: ldionne, #libc_abi Differential Revision: https://reviews.llvm.org/D137127 --- libcxxabi/test/catch_reference_nullptr.pass.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libcxxabi/test/catch_reference_nullptr.pass.cpp b/libcxxabi/test/catch_reference_nullptr.pass.cpp index 708d5d798a1d1..e9c3ba31b06b7 100644 --- a/libcxxabi/test/catch_reference_nullptr.pass.cpp +++ b/libcxxabi/test/catch_reference_nullptr.pass.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03, +// UNSUPPORTED: c++03 // UNSUPPORTED: no-exceptions #include +#include #include +#include struct A {}; @@ -27,13 +29,13 @@ static void catch_nullptr_test() { int main(int, char**) { - using nullptr_t = decltype(nullptr); + static_assert(std::is_same::value, ""); // A reference to nullptr_t can catch nullptr. - catch_nullptr_test(); - catch_nullptr_test(); - catch_nullptr_test(); - catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); // No other reference type can. #if 0 From 5617fb1411f765667c016b5b75daa9d1110c36af Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 4 Nov 2022 16:05:10 -0700 Subject: [PATCH 293/516] [MLGO][NFC] Use std::map instead of DenseMap to avoid use after free In `MLInlineAdvisor::getAdviceImpl`, we call `getCachedFPI` twice, once for the caller, once for the callee, so the second may invalidate the reference obtained by the first because the underlying implementation of the cache is a `DenseMap`. `std::map` doesn't have that problem. --- llvm/include/llvm/Analysis/MLInlineAdvisor.h | 2 +- llvm/lib/Analysis/MLInlineAdvisor.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 00e8d7d7dd4de..3db948d365c77 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -69,7 +69,7 @@ class MLInlineAdvisor : public InlineAdvisor { getSkipAdviceIfUnreachableCallsite(CallBase &CB); void print(raw_ostream &OS) const override; - mutable DenseMap FPICache; + mutable std::map FPICache; LazyCallGraph &CG; diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index f55de71ea98ae..a20c05243b773 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -415,8 +415,8 @@ void MLInlineAdvisor::print(raw_ostream &OS) const { << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n"; OS << "[MLInlineAdvisor] FPI:\n"; for (auto I : FPICache) { - OS << I.getFirst()->getName() << ":\n"; - I.getSecond().print(OS); + OS << I.first->getName() << ":\n"; + I.second.print(OS); OS << "\n"; } OS << "\n"; From 026ddced176e66657e51ffb73b26019b45485db0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 3 Nov 2022 14:05:37 -0700 Subject: [PATCH 294/516] R600: Remove broken atomicrmw patterns inc/dec are not add/sub of 1. --- .../Target/AMDGPU/EvergreenInstructions.td | 16 ------------- .../CodeGen/AMDGPU/r600.global_atomics.ll | 24 +++++++++---------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index a9a3421e81924..3d4f8d52fdc69 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -310,14 +310,6 @@ multiclass AtomicPat; } -multiclass AtomicIncDecPat { - // FIXME: Add _RTN version. We need per WI scratch location to store the old value - // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)), - (EXTRACT_SUBREG (inst_noret - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>; -} // CMPSWAP is pattern is special // EXTRACT_SUBREG here is dummy, we know the node has no uses @@ -349,14 +341,6 @@ defm AtomicOrPat : AtomicPat ; defm AtomicXorPat : AtomicPat ; -defm AtomicIncAddPat : AtomicIncDecPat ; -defm AtomicIncSubPat : AtomicIncDecPat ; -defm AtomicDecAddPat : AtomicIncDecPat ; -defm AtomicDecSubPat : AtomicIncDecPat ; // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < diff --git a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll index 1ddc41feb0069..3d2f1b4fb9f4f 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll @@ -505,36 +505,36 @@ entry: ret void } -; FUNC-LABEL: {{^}}atomic_inc_add -; EG: MEM_RAT ATOMIC_INC_UINT -define amdgpu_kernel void @atomic_inc_add(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_add_1 +; EG: MEM_RAT ATOMIC_ADD +define amdgpu_kernel void @atomic_add_1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_dec_add -; EG: MEM_RAT ATOMIC_DEC_UINT -define amdgpu_kernel void @atomic_dec_add(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_add_neg1 +; EG: MEM_RAT ATOMIC_ADD +define amdgpu_kernel void @atomic_add_neg1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_inc_sub -; EG: MEM_RAT ATOMIC_INC_UINT -define amdgpu_kernel void @atomic_inc_sub(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_sub_neg1 +; EG: MEM_RAT ATOMIC_SUB +define amdgpu_kernel void @atomic_sub_neg1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_dec_sub -; EG: MEM_RAT ATOMIC_DEC_UINT -define amdgpu_kernel void @atomic_dec_sub(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_sub_1 +; EG: MEM_RAT ATOMIC_SUB +define amdgpu_kernel void @atomic_sub_1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst From 53d5d3401120f2aa741a73a5a9ba0ce012ca532c Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 4 Nov 2022 17:52:21 +0000 Subject: [PATCH 295/516] [mlir][sparse] extend foreach operation to accept reduction arguments. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137463 --- .../SparseTensor/IR/SparseTensorOps.td | 66 ++++++++++++------- .../SparseTensor/IR/SparseTensorDialect.cpp | 36 ++++++++-- .../Transforms/SparseTensorRewriting.cpp | 59 ++++++++++------- mlir/test/Dialect/SparseTensor/invalid.mlir | 45 +++++++++++++ mlir/test/Dialect/SparseTensor/roundtrip.mlir | 20 ++++++ 5 files changed, 172 insertions(+), 54 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 8b8dc46297971..a22dcce4298ef 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -857,21 +857,44 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", [SingleBlockImplicitTerminator<"YieldOp">]>, - Arguments<(ins AnyTensor:$tensor)>{ + Arguments<(ins AnyTensor:$tensor, + Variadic:$initArgs)>, + Results<(outs Variadic:$results)> { let summary = "Iterates over elements in a tensor"; let description = [{ Iterates over stored elements in a tensor (which are typically, but not always, non-zero for sparse tensors) and executes the block. - For an input tensor with rank n, the block must take n + 1 arguments. The - first n arguments must be Index type, together indicating the current coordinates - of the element being visited. The last argument must have the same type as the + For an input tensor with rank n, the block must take n + 1 (and additional loop + carried variables as described below) arguments. The first n arguments must be + Index type, together indicating the current coordinates of the element being visited. + The last argument must have the same type as the tensor's element type, representing the actual value loaded from the input tensor at the given coordinates. - Note that foreach generated loop iterates over the stored elements in the storage - order. However, no matter what storage order is used, the indices passed to the block - always obey the original dimension order. + `sparse_tensor.foreach` can also operate on loop-carried variables and returns + the final values after loop termination. The initial values of the variables are + passed as additional SSA operands to the "sparse_tensor.foreach" following the n + 1 + SSA values mentioned above (n coordinate and 1 value). + + The region must terminate with a "sparse_tensor.yield" that passes the current + values of all loop-carried variables to the next iteration, or to the + result, if at the last iteration. The number and static types of loop-carried + variables may not change with iterations. + + For example: + ```mlir + %c0 = arith.constant 0 : i32 + %ret = sparse_tensor.foreach in %0 init(%c0): tensor, i32 -> i32 do { + ^bb0(%arg1: index, %arg2: index, %arg3: i32, %iter: i32): + %sum = arith.add %iter, %arg3 + sparse_tensor.yield %sum + } + ``` + + It is important to note that foreach generated loop iterates over the stored elements + in the storage order. However, no matter what storage order is used, the indices passed + to the block always obey the original dimension order. For example: ```mlir @@ -879,10 +902,10 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(i,j) -> (j,i)> }> - + // foreach on a column-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #COL_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1] } @@ -892,30 +915,25 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", // foreach on a row-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #ROW_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1] } ``` - - Example: - - ```mlir - sparse_tensor.foreach in %0 : tensor do { - ^bb0(%arg1: index, %arg2: index, %arg3: f64): - do something... - } - ``` }]; let builders = [ - OpBuilder<( - ins "Value":$tensor, - "function_ref")> + OpBuilder<(ins "Value":$tensor, + "function_ref")>, + OpBuilder<(ins "Value":$tensor, + "ValueRange":$iterArgs, + "function_ref")> ]; - let regions = (region AnyRegion:$region); - let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region"; + let regions = (region SizedRegion<1>:$region); + let assemblyFormat = "`in` $tensor (`init``(`$initArgs^`)`)? attr-dict" + " `:` type($tensor) (`,` type($initArgs)^)?" + " (`->` type($results)^)? `do` $region"; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 133879b12b197..4563a054ec160 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -581,11 +581,20 @@ LogicalResult CompressOp::verify() { void ForeachOp::build( OpBuilder &builder, OperationState &result, Value tensor, - function_ref bodyBuilder) { - build(builder, result, tensor); + function_ref + bodyBuilder) { + build(builder, result, tensor, llvm::None, bodyBuilder); +} + +void ForeachOp::build( + OpBuilder &builder, OperationState &result, Value tensor, + ValueRange initArgs, + function_ref + bodyBuilder) { + build(builder, result, initArgs.getTypes(), tensor, initArgs); + // Builds foreach body. if (!bodyBuilder) return; - auto rtp = tensor.getType().cast(); int64_t rank = rtp.getRank(); @@ -602,23 +611,38 @@ void ForeachOp::build( auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuilder(builder, result.location, bodyBlock->getArguments()); + bodyBuilder(builder, result.location, + bodyBlock->getArguments().slice(0, rank), + bodyBlock->getArguments()[rank], + bodyBlock->getArguments().drop_front(rank + 1)); } LogicalResult ForeachOp::verify() { auto t = getTensor().getType().cast(); auto args = getBody()->getArguments(); - if (static_cast(t.getRank()) + 1 != args.size()) + if (static_cast(t.getRank()) + 1 + getInitArgs().size() != + args.size()) return emitError("Unmatched number of arguments in the block"); + if (getNumResults() != getInitArgs().size()) + return emitError("Mismatch in number of init arguments and results"); + + if (getResultTypes() != getInitArgs().getTypes()) + return emitError("Mismatch in types of init arguments and results"); + + auto yield = cast(getBody()->getTerminator()); + if (yield.getNumOperands() != getNumResults() || + yield.getOperands().getTypes() != getResultTypes()) + return emitError("Mismatch in types of yield values and results"); + for (int64_t i = 0, e = t.getRank(); i < e; i++) if (args[i].getType() != IndexType::get(getContext())) emitError( llvm::formatv("Expecting Index type for argument at index {0}", i)); auto elemTp = t.getElementType(); - auto valueTp = args.back().getType(); + auto valueTp = args[t.getRank()].getType(); if (elemTp != valueTp) emitError(llvm::formatv("Unmatched element type between input tensor and " "block argument, expected:{0}, got: {1}", diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 9c002f1ae0ec8..7747fd73aa9bb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -357,7 +357,9 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); rewriter.create( - loc, srcTensor, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, srcTensor, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector srcIndices; SmallVector dstIndices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { @@ -366,7 +368,7 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - builder.create(loc, args.back(), cooBuffer, dstIndices); + builder.create(loc, v, cooBuffer, dstIndices); builder.create(loc); }); @@ -446,7 +448,9 @@ struct ConcatenateRewriter : public OpRewritePattern { // Build a for op for each input tensor to append new values into the // output tensor. rewriter.create( - loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, input, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0; i < rank; i++) { uint64_t dim = @@ -457,7 +461,7 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - builder.create(loc, args.back(), cooBuffer, indices); + builder.create(loc, v, cooBuffer, indices); builder.create(loc); }); // Accumulates the offset. Note that only static-shaped inputs are allowed @@ -558,12 +562,13 @@ struct ConvertRewriter : public OpRewritePattern { sizesForTensor(rewriter, sizes, loc, srcTp, src); Value dst = allocDenseTensor(rewriter, loc, dstTp, sizes); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { - builder.create(loc, args.back(), dst, - args.drop_back()); - builder.create(loc); - }); + rewriter.create(loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, + ValueRange args, Value v, ValueRange reduc) { + builder.create(loc, v, dst, + args); + builder.create(loc); + }); rewriter.replaceOpWithNewOp(op, dstTp, dst); return success(); @@ -598,13 +603,15 @@ struct ConvertRewriter : public OpRewritePattern { tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - builder.create(loc, args.back(), tmpCoo, indices); + builder.create(loc, v, tmpCoo, indices); builder.create(loc); }); src = tmpCoo; @@ -646,16 +653,18 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { - SmallVector indices; - for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { - uint64_t dim = toStoredDim(encDst, i); - indices.push_back(args[dim]); - } - builder.create(loc, args.back(), dst, indices); - builder.create(loc); - }); + rewriter.create(loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, + ValueRange args, Value v, ValueRange reduc) { + SmallVector indices; + for (int64_t i = 0, e = srcTp.getRank(); i < e; + i++) { + uint64_t dim = toStoredDim(encDst, i); + indices.push_back(args[dim]); + } + builder.create(loc, v, dst, indices); + builder.create(loc); + }); // Release the temporary COO if it is created. if (tmpCoo) @@ -866,12 +875,14 @@ struct OutRewriter : public OpRewritePattern { ModuleOp module = op->getParentOfType(); // For each element in the source tensor, output the element. rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { for (uint64_t i = 0; i < rank; i++) { rewriter.create(loc, args[i], indices, constantIndex(builder, loc, i)); } - rewriter.create(loc, args.back(), value); + rewriter.create(loc, v, value); SmallVector operands{writer, rankValue, indices, value}; FlatSymbolRefAttr fn = getFunc(module, outNextFuncName, {}, operands, EmitCInterface::On); diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 1ab4a66665287..407f19401b86b 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -551,6 +551,51 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { // ----- +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { + // expected-error@+1 {{Unmatched element type between input tensor and block argument}} + sparse_tensor.foreach in %arg0 : tensor<2x4xf64, #DCSR> do { + ^bb0(%1: index, %2: index, %v: f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in number of init arguments and results}} + sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r1 : i32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of init arguments and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> i32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of yield values and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + sparse_tensor.yield %1 : index + } + return +} + +// ----- + // TODO: a test case with empty xs doesn't work due to some parser issues. func.func @sparse_sort_x_type( %arg0: index, %arg1: memref) { diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index e19a5ee833f83..628ce3b4535a5 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -411,6 +411,26 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { return } +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// CHECK-LABEL: func @sparse_tensor_foreach( +// CHECK-SAME: %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[A1:.*]]: f32 +// CHECK-NEXT: %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]]) +// CHECK-NEXT: ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32) +// CHECK: sparse_tensor.yield %[[TMP_r]] : f32 +// CHECK: } +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<2x4xf64, #DCSR>, f32 -> f32 + do { + ^bb0(%1: index, %2: index, %v: f64, %r: f32) : + sparse_tensor.yield %r : f32 + } + return +} + // ---- // CHECK-LABEL: func @sparse_sort_1d0v( From 7ded25c0cb298f474ffc1d9691a7d7c57c08c9f6 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 4 Nov 2022 23:35:05 +0000 Subject: [PATCH 296/516] Fix Bazel build for ba65584d1545951ce958ea5729692374055d6b9f --- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 2c819c3181e98..0cc19721567b0 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -67,6 +67,7 @@ cc_library( srcs = glob( [ "lib/Analysis/*.cpp", + "lib/Analysis/*.h", "lib/Analysis/DataFlow/*.cpp", ], ), From 70508b614e6478ba2c3fc79e935e2c68e2d79b71 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 4 Nov 2022 19:35:29 +0000 Subject: [PATCH 297/516] [mlir][sparse] fix sparse tensor rewriting patterns that do not propagate sparse tensor SSA properly. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137468 --- .../SparseTensor/IR/SparseTensorDialect.cpp | 5 +- .../SparseTensor/Transforms/CodegenUtils.cpp | 3 + .../Transforms/SparseTensorRewriting.cpp | 119 +++++++++++------- .../SparseTensor/convert_dense2sparse.mlir | 22 ++-- .../SparseTensor/convert_sparse2sparse.mlir | 10 +- .../SparseTensor/rewriting_for_codegen.mlir | 11 +- .../SparseTensor/sparse_concat_codegen.mlir | 27 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 104 ++++++++------- 8 files changed, 181 insertions(+), 120 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 4563a054ec160..9d8cf37befd49 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -603,9 +603,12 @@ void ForeachOp::build( std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType()); // Followed by one value. blockArgTypes.push_back(rtp.getElementType()); + // Followed by reduction variable. + blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end()); SmallVector blockArgLocs; - std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc()); + std::fill_n(std::back_inserter(blockArgLocs), blockArgTypes.size(), + tensor.getLoc()); OpBuilder::InsertionGuard guard(builder); auto ®ion = *result.regions.front(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 85b0dd7601157..27b7acbd322dc 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -956,6 +956,9 @@ Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, return val; } +// FIXME: +// 1. Dense tensors loop should be generated by loop emitter. +// 2. Support reduction variables to propagate SSA chains properly. void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( OpBuilder &builder, Location loc, Value src, unsigned rank, function_ref bodyBuilder) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 7747fd73aa9bb..d0613c09503c0 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -356,8 +356,8 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); - rewriter.create( - loc, srcTensor, llvm::None, + ForeachOp foreachOp = rewriter.create( + loc, srcTensor, cooBuffer, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector srcIndices; @@ -368,11 +368,11 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - builder.create(loc, v, cooBuffer, dstIndices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), dstIndices); + builder.create(loc, t); }); - - rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -442,13 +442,14 @@ struct ConcatenateRewriter : public OpRewritePattern { rewriter.create(loc, cooTp, ValueRange()).getResult(); Value offset = constantIndex(rewriter, loc, 0); + ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds the indexing map. // Build a for op for each input tensor to append new values into the // output tensor. - rewriter.create( - loc, input, llvm::None, + foreachOp = rewriter.create( + loc, input, cooBuffer, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector indices; @@ -461,8 +462,8 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - builder.create(loc, v, cooBuffer, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -471,7 +472,10 @@ struct ConcatenateRewriter : public OpRewritePattern { assert(!ShapedType::isDynamic(d)); offset = rewriter.create(loc, offset, constantIndex(rewriter, loc, d)); + cooBuffer = foreachOp.getResult(0); } + + cooBuffer = rewriter.create(loc, cooBuffer, true); rewriter.replaceOpWithNewOp(op, rtp, cooBuffer); return success(); } @@ -602,8 +606,8 @@ struct ConvertRewriter : public OpRewritePattern { srcTp = getUnorderedCOOFromType(srcTp); tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); - rewriter.create( - loc, src, llvm::None, + auto foreachOp = rewriter.create( + loc, src, tmpCoo, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector indices; @@ -611,10 +615,10 @@ struct ConvertRewriter : public OpRewritePattern { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - builder.create(loc, v, tmpCoo, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); - src = tmpCoo; + src = rewriter.create(loc, foreachOp.getResult(0), true); } // Sort the COO tensor so that its elements are ordered via increasing @@ -653,29 +657,31 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - rewriter.create(loc, src, llvm::None, - [&](OpBuilder &builder, Location loc, - ValueRange args, Value v, ValueRange reduc) { - SmallVector indices; - for (int64_t i = 0, e = srcTp.getRank(); i < e; - i++) { - uint64_t dim = toStoredDim(encDst, i); - indices.push_back(args[dim]); - } - builder.create(loc, v, dst, indices); - builder.create(loc); - }); + auto foreachOp = rewriter.create( + loc, src, dst, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { + SmallVector indices; + for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { + uint64_t dim = toStoredDim(encDst, i); + indices.push_back(args[dim]); + } + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); + }); - // Release the temporary COO if it is created. + // Release the temporary COO if it is created. Note that tmpCoo is + // invalidated due to foreach and updated to src. if (tmpCoo) - rewriter.create(loc, tmpCoo); + rewriter.create(loc, src); // Directly replace op with dst results in bufferization error message // "sparse tensor allocation should not escape function". // As such, we insert a trivial tensor convert which will be removed by // codegen. rewriter.setInsertionPointAfter(op); - rewriter.replaceOpWithNewOp(op, dstTp, dst); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -694,6 +700,8 @@ struct ForeachRewriter : public OpRewritePattern { int64_t rank = rtp.getRank(); auto enc = getSparseTensorEncoding(rtp); + SmallVector reduc = op.getInitArgs(); + // 1. Generates loop for the sparse input. SparseTensorLoopEmitter loopEmitter(ValueRange{input}); loopEmitter.initializeLoopEmit(rewriter, loc); @@ -701,7 +709,9 @@ struct ForeachRewriter : public OpRewritePattern { // TODO: provide utility function for loop sequences that only contains // one for loop? loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast(i)); - loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i); + // Note that reduc will be taken care of by loop emitter and get updated + // in place. + loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i, reduc); } SmallVector coords; @@ -716,15 +726,7 @@ struct ForeachRewriter : public OpRewritePattern { : rewriter.create(loc, vals, coords); // 2. Inline the block in the foreach operator. - Block::iterator inlinePos = rewriter.getInsertionPoint(); Block *srcBlock = op.getBody(); - // Remove sparse_tensor.yield. - rewriter.eraseOp(srcBlock->getTerminator()); - - for (int64_t i = 0; i < rank; i++) { - loopEmitter.exitCurrentLoop(rewriter, loc); - loopEmitter.exitCurrentLoopSeq(); - } SmallVector args; // Remap coordinates. @@ -734,11 +736,33 @@ struct ForeachRewriter : public OpRewritePattern { } // Remap value. args.push_back(val); + // Remap reduction variables. + args.append(reduc); + + // Remove sparse_tensor.yield. + SmallVector reducValue = srcBlock->getTerminator()->getOperands(); + rewriter.eraseOp(srcBlock->getTerminator()); // Inline body. - rewriter.mergeBlockBefore(srcBlock, &*inlinePos, args); - // delete the foreach operator. - rewriter.eraseOp(op); + if (!reducValue.empty()) { + rewriter.mergeBlocks(srcBlock, rewriter.getBlock(), args); + } else { + // This is annoying, since scf.for inserts a implicit yield op when + // there is no reduction variable upon creation, in this case we need to + // merge the block *before* the yield op. + rewriter.mergeBlockBefore(srcBlock, &*rewriter.getInsertionPoint(), args); + } + + for (int64_t i = 0; i < rank; i++) { + // Link the reduction chain. Note that loop emitter update the reducValue + // in place. + loopEmitter.exitCurrentLoop(rewriter, loc, reducValue); + loopEmitter.exitCurrentLoopSeq(); + } + + // Replace the foreach operator with the value returned by the outtermost + // for loop. + rewriter.replaceOp(op, reducValue); return success(); } }; @@ -801,7 +825,8 @@ struct NewRewriter : public OpRewritePattern { .getResult(0); Type eltTp = dstTp.getElementType(); Value value = genAllocaScalar(rewriter, loc, eltTp); - scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1); + scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1, + ArrayRef(cooBuffer)); rewriter.setInsertionPointToStart(forOp.getBody()); SmallString<18> getNextFuncName{"getSparseTensorReaderNext", @@ -816,13 +841,17 @@ struct NewRewriter : public OpRewritePattern { loc, indices, constantIndex(rewriter, loc, i))); } Value v = rewriter.create(loc, value); - rewriter.create(loc, v, cooBuffer, indicesArray); + auto t = rewriter.create(loc, v, forOp.getRegionIterArg(0), + indicesArray); + rewriter.create(loc, ArrayRef(t)); rewriter.setInsertionPointAfter(forOp); + // Link SSA chain. + cooBuffer = forOp.getResult(0); // Release the sparse tensor reader. createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader}, EmitCInterface::Off); - + cooBuffer = rewriter.create(loc, cooBuffer, true); Value newOp = rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); // Release the unordered COO tensor buffer. diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index d67e11b92dd9c..cb1f16ef2cd20 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -116,6 +116,7 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V:.*]] = tensor.extract %[[A]]{{\[}}%[[FI]], %[[FJ]]] : tensor<2x4xf64> // CHECK-RWT: %[[NZ:.*]] = arith.cmpf une, %[[V]], %[[F0]] : f64 // CHECK-RWT: scf.if %[[NZ]] { +// // FIXME: the SSA chain is broken here! // CHECK-RWT: %{{.*}} = sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[FI]], %[[FJ]]] // CHECK-RWT: } // CHECK-RWT: } @@ -126,11 +127,13 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V2:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]], %[[I1]] jointly %[[V2]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): -// CHECK-RWT: sparse_tensor.insert %[[FV]] into %[[DST]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64, %[[R0:.*]]: tensor +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.insert %[[FV]] into %[[R0]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: sparse_tensor.yield %[[RET]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.load %[[NEW_T]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[NT]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { @@ -179,6 +182,7 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[I1r:.*]] = tensor.extract %[[SI]]{{\[}}%[[FI]], %[[C1]]] : tensor<2x2xi64> // CHECK-RWT: %[[I1:.*]] = arith.index_cast %[[I1r]] : i64 to index // CHECK-RWT: %[[V:.*]] = tensor.extract %[[SV]]{{\[}}%[[FI]]] : tensor<2xf32> +// // FIXME: the SSA chain is broken here! // CHECK-RWT: sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[I0]], %[[I1]]] // CHECK-RWT: } // CHECK-RWT: %[[TI0:.*]] = sparse_tensor.indices %[[COO]] {dimension = 0 : index} @@ -187,11 +191,13 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[TV:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[TI0]], %[[TI1]] jointly %[[TV]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[F2V]] into %[[DST]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32, %[[R0:.*]]: tensor +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.insert %[[F2V]] into %[[R0]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: sparse_tensor.yield %[[NEW_T]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 92f9e46b90938..17145f8d37380 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -94,11 +94,13 @@ func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[A]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]] jointly %[[V]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor(%[[D]]) -// CHECK-RWT: sparse_tensor.foreach in %[[A]] -// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[FV2]] into %[[DST]]{{\[}}%[[FI2]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[A]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32, %[[T:.*]]: tensor> func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 79b616dec8304..3a6cf999df90a 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -18,18 +18,19 @@ // CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) // CHECK: %[[VB:.*]] = memref.alloca() -// CHECK: scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] { +// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) // CHECK: %[[E0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[E1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[V:.*]] = memref.load %[[VB]][] -// CHECK: sparse_tensor.insert %[[V]] into %[[T]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: %[[T1:.*]] = sparse_tensor.insert %[[V]] into %[[A2]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: scf.yield %[[T1]] // CHECK: } // CHECK: call @delSparseTensorReader(%[[R]]) -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T]] -// CHECK: bufferization.dealloc_tensor %[[T]] +// CHECK: %[[T3:.*]] = sparse_tensor.load %[[T2]] hasInserts +// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T3]] +// CHECK: bufferization.dealloc_tensor %[[T3]] // CHECK: return %[[R]] -// CHECK: } func.func @sparse_new(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir index 7280c6f5e7ba3..717819bd0cb16 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -19,16 +19,18 @@ // CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] { +// CHECK: %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_1]] // CHECK: } +// CHECK: scf.yield %[[RET_4]] // CHECK: } // CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor @@ -37,17 +39,19 @@ // CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] { +// CHECK: %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_5:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A3:.*]] = %[[A2]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_2]] // CHECK: } +// CHECK: scf.yield %[[RET_5]] // CHECK: } // CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor @@ -56,19 +60,22 @@ // CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] { +// CHECK: %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_6:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A5:.*]] = %[[A4]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_3]] // CHECK: } +// CHECK: scf.yield %[[RET_6]] // CHECK: } -// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts +// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor // CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index c162bacffac96..94ee50197fa9c 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -52,14 +52,16 @@ // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT:.*]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> // func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> { @@ -111,25 +113,28 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor() // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[A0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[A1:.*]] = %[[A0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index +// CHECK-RWT: %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[R1]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<100xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> // func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> { @@ -191,7 +196,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[T1:.*]] = arith.muli %[[DD0]], %[[C10]] : index @@ -200,9 +205,11 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index // CHECK-RWT: %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor { @@ -260,28 +267,31 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor(%[[DD0]]) // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index -// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index -// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index -// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[R1:.*]] = %[[R0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index +// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index +// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index +// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[NT]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor { From d6b224edd3f0c8f925d2435b487e5ea4a1ccc29d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 4 Nov 2022 12:59:55 -0700 Subject: [PATCH 298/516] [RISCV] Rename isAllUsesReadW to hasAllWUsers in RISCVSExtWRemoval. NFC --- llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index 14115982313a3..14d7afb512e1a 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -73,8 +73,7 @@ static void addUses(const MachineInstr &MI, // returns true if all uses of OrigMI only depend on the lower word of its // output, so we can transform OrigMI to the corresponding W-version. // TODO: handle multiple interdependent transformations -static bool isAllUsesReadW(const MachineInstr &OrigMI, - MachineRegisterInfo &MRI) { +static bool hasAllWUsers(const MachineInstr &OrigMI, MachineRegisterInfo &MRI) { SmallPtrSet Visited; SmallVector Worklist; @@ -258,7 +257,7 @@ static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, case RISCV::ADDI: if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0) return true; - if (isAllUsesReadW(MI, MRI)) { + if (hasAllWUsers(MI, MRI)) { // transform to ADDIW FixableDef.insert(&MI); return true; @@ -286,7 +285,7 @@ static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, case RISCV::LWU: case RISCV::MUL: case RISCV::SUB: - if (isAllUsesReadW(MI, MRI)) { + if (hasAllWUsers(MI, MRI)) { FixableDef.insert(&MI); return true; } From a4b543a5a541eeff5a7ba92ada4b0d809a2c8482 Mon Sep 17 00:00:00 2001 From: Matthew Voss Date: Fri, 4 Nov 2022 17:08:54 -0700 Subject: [PATCH 299/516] [llvm-profdata] Check for all duplicate entries in MemOpSize table Previously, we only checked for duplicate zero entries when merging a MemOPSize table (see D92074), but a user recently provided a reproducer demonstrating that other entries can also be duplicated. As demonstrated by the test in this patch, PGOMemOPSizeOpt can potentially generate invalid IR for non-zero, non-consecutive duplicate entries. This seems to be a rare case, since the duplicate entry is often below the threshold, but possible. This patch extends the existing warning to check for any duplicate values in the table, both in the optimization and in llvm-profdata. Differential Revision: https://reviews.llvm.org/D136211 --- llvm/lib/ProfileData/InstrProfWriter.cpp | 9 +++------ .../Transforms/Instrumentation/PGOMemOPSizeOpt.cpp | 11 ++++------- llvm/test/Transforms/PGOProfile/consecutive-zeros.ll | 3 +-- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index cd4e8900c9637..de632695ca499 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -530,13 +530,10 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { for (uint32_t S = 0; S < NS; S++) { uint32_t ND = Func.getNumValueDataForSite(VK, S); std::unique_ptr VD = Func.getValueForSite(VK, S); - bool WasZero = false; + DenseSet SeenValues; for (uint32_t I = 0; I < ND; I++) - if ((VK != IPVK_IndirectCallTarget) && (VD[I].Value == 0)) { - if (WasZero) - return make_error(instrprof_error::invalid_prof); - WasZero = true; - } + if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second) + return make_error(instrprof_error::invalid_prof); } } diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 07c03ee2049ac..267446bddcf5f 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -291,9 +291,9 @@ bool MemOPSizeOpt::perform(MemOp MO) { uint64_t SavedRemainCount = SavedTotalCount; SmallVector SizeIds; SmallVector CaseCounts; + SmallDenseSet SeenSizeId; uint64_t MaxCount = 0; unsigned Version = 0; - int64_t LastV = -1; // Default case is in the front -- save the slot here. CaseCounts.push_back(0); SmallVector RemainingVDs; @@ -316,15 +316,12 @@ bool MemOPSizeOpt::perform(MemOp MO) { break; } - if (V == LastV) { - LLVM_DEBUG(dbgs() << "Invalid Profile Data in Function " << Func.getName() - << ": Two consecutive, identical values in MemOp value" - "counts.\n"); + if (!SeenSizeId.insert(V).second) { + errs() << "Invalid Profile Data in Function " << Func.getName() + << ": Two identical values in MemOp value counts.\n"; return false; } - LastV = V; - SizeIds.push_back(V); CaseCounts.push_back(C); if (C > MaxCount) diff --git a/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll b/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll index a388cbc37d5a6..6634838d21e1a 100644 --- a/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll +++ b/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll @@ -1,6 +1,5 @@ -; REQUIRES: asserts ; RUN: llvm-profdata merge %S/Inputs/consecutive-zeros.proftext -o %t.profdata -; RUN: opt < %s -debug -passes=pgo-instr-use,pgo-memop-opt -pgo-memop-count-threshold=0 -pgo-memop-percent-threshold=0 -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use,pgo-memop-opt -pgo-memop-count-threshold=0 -pgo-memop-percent-threshold=0 -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" From eace13928be495d0f9f8ea3e730bea57ddd9fe13 Mon Sep 17 00:00:00 2001 From: Jennifer Yu Date: Fri, 4 Nov 2022 17:15:14 -0700 Subject: [PATCH 300/516] Back out test that failed. But I can not reproduce the problem on my local machine. My local machine run: 222 0x5a6780 222 0x7fffbef9400e 222 0x5a677e 0x5a6780 0x7fffbef936c8 222 0x376f8e 0x376f90 0x7fffbef94008 222 0x281f20 222 0x7fffbef9400e PASSED --- .../test/mapping/has_device_addr.cpp | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/openmp/libomptarget/test/mapping/has_device_addr.cpp b/openmp/libomptarget/test/mapping/has_device_addr.cpp index 8a17228301463..63fa313cf2780 100644 --- a/openmp/libomptarget/test/mapping/has_device_addr.cpp +++ b/openmp/libomptarget/test/mapping/has_device_addr.cpp @@ -25,60 +25,9 @@ struct view { } }; -void poo() { - short a = 1; - short &ar = a; - -#pragma omp target data map(tofrom : ar) use_device_addr(ar) - { -#pragma omp target has_device_addr(ar) - { - ar = 222; - // CHECK: 222 - printf("%hd %p\n", ar, &ar); // 222 p2 - } - } - // CHECK: 222 - printf("%hd %p\n", ar, &ar); // 222 p1 -} - -void noo() { - short *b = (short *)malloc(sizeof(short)); - short *&br = b; - br = br - 1; - - br[1] = 111; -#pragma omp target data map(tofrom : br[1]) use_device_addr(br[1]) -#pragma omp target has_device_addr(br[1]) - { - br[1] = 222; - // CHECK: 222 - printf("%hd %p %p %p\n", br[1], br, &br[1], &br); - } - // CHECK: 222 - printf("%hd %p %p %p\n", br[1], br, &br[1], &br); -} - -void ooo() { - short a = 1; - -#pragma omp target data map(tofrom : a) use_device_addr(a) -#pragma omp target has_device_addr(a) - { - a = 222; - // CHECK: 222 - printf("%hd %p\n", a, &a); - } - // CHECK: 222 - printf("%hd %p\n", a, &a); -} - int main() { view a; a.foo(); - poo(); - noo(); - ooo(); // CHECK: PASSED printf("PASSED\n"); } From 0faac15089af4f75172e48b2580dfc46eedcc3b7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 4 Nov 2022 17:26:16 -0700 Subject: [PATCH 301/516] [RISCV] Remove duplicate SDNodeXForm. NFC --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 28 +++++++++-------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 50ecd8b8da998..90ae2ac398436 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -101,12 +101,6 @@ def BSETINVTwoBitsMask : PatLeaf<(imm), [{ return countPopulation(N->getZExtValue()) == 2; }]>; -def TrailingZerosXForm : SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(countTrailingZeros(I), SDLoc(N), - N->getValueType(0)); -}]>; - def BSETINVTwoBitsMaskHigh : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(63 - countLeadingZeros(I), SDLoc(N), @@ -562,10 +556,10 @@ def : Pat<(seteq (and GPR:$rs1, SingleBitSetMask:$mask), 0), (BEXTI (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>; def : Pat<(or GPR:$r, BSETINVTwoBitsMask:$i), - (BSETI (BSETI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)), + (BSETI (BSETI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)), (BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>; def : Pat<(xor GPR:$r, BSETINVTwoBitsMask:$i), - (BINVI (BINVI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)), + (BINVI (BINVI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)), (BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>; def : Pat<(or GPR:$r, BSETINVORIMask:$i), (BSETI (ORI GPR:$r, (BSETINVORIMaskLow BSETINVORIMask:$i)), @@ -702,13 +696,13 @@ def : Pat<(add GPR:$r, CSImm12MulBy8:$i), def : Pat<(mul GPR:$r, C3LeftShift:$i), (SLLI (SH1ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C3LeftShift:$i))>; + (TrailingZeros C3LeftShift:$i))>; def : Pat<(mul GPR:$r, C5LeftShift:$i), (SLLI (SH2ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C5LeftShift:$i))>; + (TrailingZeros C5LeftShift:$i))>; def : Pat<(mul GPR:$r, C9LeftShift:$i), (SLLI (SH3ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C9LeftShift:$i))>; + (TrailingZeros C9LeftShift:$i))>; def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)), (SH1ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>; @@ -771,14 +765,14 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i), - (SH1ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C3LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C3LeftShiftUW:$i)))>; + (SH1ADD (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)))>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C5LeftShiftUW:$i), - (SH2ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C5LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C5LeftShiftUW:$i)))>; + (SH2ADD (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)))>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C9LeftShiftUW:$i), - (SH3ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C9LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C9LeftShiftUW:$i)))>; + (SH3ADD (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)))>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { From 91aa233bdf9da29391e107e0575a733deb38c4c2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 4 Nov 2022 17:49:08 -0700 Subject: [PATCH 302/516] [RISCV] Add isel pattern for (and X, 0xffffffff << C) with Zba. We can use SRLI by C to clear LSBs followed by a SLLI_UW. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 16 ++++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 27 +++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 90ae2ac398436..61476dd43d296 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -242,6 +242,16 @@ def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ return !C || !isInt<12>(C->getSExtValue()); }]>; +def Shifted32OnesMask : PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + if (!isShiftedMask_64(Imm)) + return false; + + unsigned TrailingZeros = countTrailingZeros(Imm); + return TrailingZeros > 0 && TrailingZeros < 32 && + Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros; +}], TrailingZeros>; + def sh1add_op : ComplexPattern; def sh2add_op : ComplexPattern; def sh3add_op : ComplexPattern; @@ -731,6 +741,12 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)), let Predicates = [HasStdExtZba, IsRV64] in { def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), (SLLI_UW GPR:$rs1, uimm5:$shamt)>; +// Match a shifted 0xffffffff mask. Use SRLI to clear the LSBs and SLLI_UW to +// mask and shift. +def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), + (SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask), + Shifted32OnesMask:$mask)>; + def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), (ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>; diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index e9161ae8d69d0..6fa4e37782b37 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1408,6 +1408,33 @@ define signext i16 @srliw_1_sh1add(i16* %0, i32 signext %1) { ret i16 %6 } +define i128 @slliuw_ptrdiff(i64 %diff, i128* %baseptr) { +; RV64I-LABEL: slliuw_ptrdiff: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: slli a2, a2, 36 +; RV64I-NEXT: addi a2, a2, -16 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a1, a1, a0 +; RV64I-NEXT: ld a0, 0(a1) +; RV64I-NEXT: ld a1, 8(a1) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: slliuw_ptrdiff: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 4 +; RV64ZBA-NEXT: slli.uw a0, a0, 4 +; RV64ZBA-NEXT: add a1, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a1) +; RV64ZBA-NEXT: ld a1, 8(a1) +; RV64ZBA-NEXT: ret + %ptrdiff = lshr exact i64 %diff, 4 + %cast = and i64 %ptrdiff, 4294967295 + %ptr = getelementptr inbounds i128, i128* %baseptr, i64 %cast + %res = load i128, i128* %ptr + ret i128 %res +} + define signext i32 @srliw_2_sh2add(i32* %0, i32 signext %1) { ; RV64I-LABEL: srliw_2_sh2add: ; RV64I: # %bb.0: From 200266a0a12b0af77967c8770e1e1bdd634bfc4a Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Thu, 3 Nov 2022 19:14:52 +0000 Subject: [PATCH 303/516] [mlir][MemRef] Fix the lowering of extract_strided_metadata The first result of the extract_strided_metadata operation is a MemRef, not a naked pointer. This patch fixes the lowering of this operation in MemRefToLLVM so that we properly materialize the full MemRef structure and not just the base, naked, pointer. Differential Revision: https://reviews.llvm.org/D137364 --- .../include/mlir/Conversion/LLVMCommon/MemRefBuilder.h | 4 ++++ mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp | 8 +++++++- mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp | 10 ++++++++-- mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir | 6 ++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h index ac54ee6888136..2b7735da84666 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -43,6 +43,10 @@ class MemRefDescriptor : public StructBuilder { static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, MemRefType type, Value memory); + static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, + LLVMTypeConverter &typeConverter, + MemRefType type, Value memory, + Value alignedMemory); /// Builds IR extracting the allocated pointer from the descriptor. Value allocatedPtr(OpBuilder &builder, Location loc); diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp index a65ac51c31c63..4f72cd1081f0e 100644 --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -43,6 +43,12 @@ MemRefDescriptor MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, MemRefType type, Value memory) { + return fromStaticShape(builder, loc, typeConverter, type, memory, memory); +} + +MemRefDescriptor MemRefDescriptor::fromStaticShape( + OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, + MemRefType type, Value memory, Value alignedMemory) { assert(type.hasStaticShape() && "unexpected dynamic shape"); // Extract all strides and offsets and verify they are static. @@ -61,7 +67,7 @@ MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, auto descr = MemRefDescriptor::undef(builder, loc, convertedType); descr.setAllocatedPtr(builder, loc, memory); - descr.setAlignedPtr(builder, loc, memory); + descr.setAlignedPtr(builder, loc, alignedMemory); descr.setConstantOffset(builder, loc, offset); // Fill in sizes and strides diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index d63b84ccdf856..4685590fa1d32 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -2115,7 +2115,7 @@ class ExtractStridedMetadataOpLowering return failure(); // Create the descriptor. - MemRefDescriptor sourceMemRef(adaptor.getOperands().front()); + MemRefDescriptor sourceMemRef(adaptor.getSource()); Location loc = extractStridedMetadataOp.getLoc(); Value source = extractStridedMetadataOp.getSource(); @@ -2125,7 +2125,13 @@ class ExtractStridedMetadataOpLowering results.reserve(2 + rank * 2); // Base buffer. - results.push_back(sourceMemRef.allocatedPtr(rewriter, loc)); + Value baseBuffer = sourceMemRef.allocatedPtr(rewriter, loc); + Value alignedBuffer = sourceMemRef.alignedPtr(rewriter, loc); + MemRefDescriptor dstMemRef = MemRefDescriptor::fromStaticShape( + rewriter, loc, *getTypeConverter(), + extractStridedMetadataOp.getBaseBuffer().getType().cast(), + baseBuffer, alignedBuffer); + results.push_back((Value)dstMemRef); // Offset. results.push_back(sourceMemRef.offset(rewriter, loc)); diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index 12fd3f21e3da0..344e06db09b62 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -1169,6 +1169,12 @@ func.func @extract_aligned_pointer_as_index(%m: memref) -> index { // CHECK-SAME: %[[ARG:.*]]: memref // CHECK: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : memref> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[ALIGNED_BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BASE]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[ALIGNED_BASE]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[OFF0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[BASE_BUFFER_DESC:.*]] = llvm.insertvalue %[[OFF0]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64)> // CHECK: %[[OFFSET:.*]] = llvm.extractvalue %[[MEM_DESC]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[SIZE0:.*]] = llvm.extractvalue %[[MEM_DESC]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[SIZE1:.*]] = llvm.extractvalue %[[MEM_DESC]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> From 38c219b4a8ebe30d781a1ebbb9a9d29b24c28b39 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Thu, 3 Nov 2022 16:06:59 -0700 Subject: [PATCH 304/516] [mlir] Infer SubElementInterface implementations using the storage KeyTy The KeyTy of attribute/type storage classes provide enough information for automatically implementing the necessary sub element interface methods. This removes the need for derived classes to do it themselves, which is both much nicer and easier to handle certain invariants (e.g. null handling). In cases where explicitly handling for parameter types is necessary, they can provide an implementation of `AttrTypeSubElementHandler` to opt-in to support. This tickles a few things alias wise, which annoyingly messes with tests that hard code specific affine map numbers. Differential Revision: https://reviews.llvm.org/D137374 --- flang/test/Fir/affine-promotion.fir | 22 +- mlir/docs/AttributesAndTypes.md | 7 + .../mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 16 +- mlir/include/mlir/IR/BuiltinAttributes.td | 8 +- .../mlir/IR/BuiltinLocationAttributes.td | 8 +- mlir/include/mlir/IR/Location.h | 20 ++ mlir/include/mlir/IR/StorageUniquerSupport.h | 6 +- mlir/include/mlir/IR/SubElementInterfaces.h | 247 ++++++++++++++++++ mlir/include/mlir/IR/SubElementInterfaces.td | 23 +- mlir/include/mlir/IR/TypeRange.h | 17 ++ mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp | 163 ------------ mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 66 ----- mlir/lib/IR/BuiltinAttributes.cpp | 70 ----- mlir/lib/IR/BuiltinTypes.cpp | 92 ------- mlir/lib/IR/Location.cpp | 66 ----- mlir/lib/IR/SubElementInterfaces.cpp | 14 - mlir/lib/IR/TypeDetail.h | 10 +- mlir/test/Dialect/Affine/loop-tiling.mlir | 4 +- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 6 +- .../Dialect/MemRef/fold-memref-alias-ops.mlir | 2 +- .../Dialect/SCF/for-loop-specialization.mlir | 2 +- .../SCF/parallel-loop-specialization.mlir | 4 +- .../Dialect/SCF/parallel-loop-tiling.mlir | 10 +- mlir/test/Dialect/SparseTensor/roundtrip.mlir | 2 +- mlir/test/IR/affine-map.mlir | 4 - mlir/test/IR/memory-ops.mlir | 10 +- mlir/test/Transforms/loop-fusion-2.mlir | 6 +- .../Transforms/normalize-memrefs-ops.mlir | 8 +- mlir/test/lib/Dialect/Test/TestAttrDefs.td | 3 +- mlir/test/lib/Dialect/Test/TestAttributes.cpp | 14 - mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp | 7 + mlir/unittests/IR/SubElementInterfaceTest.cpp | 5 +- 32 files changed, 385 insertions(+), 557 deletions(-) diff --git a/flang/test/Fir/affine-promotion.fir b/flang/test/Fir/affine-promotion.fir index 4879e51a44512..aae35c6ef5659 100644 --- a/flang/test/Fir/affine-promotion.fir +++ b/flang/test/Fir/affine-promotion.fir @@ -50,21 +50,21 @@ func.func @loop_with_load_and_store(%a1: !arr_d1, %a2: !arr_d1, %a3: !arr_d1) { // CHECK: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = arith.constant 100 : index // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_6:.*]] = affine.apply #map(){{\[}}%[[VAL_3]], %[[VAL_4]]] +// CHECK: %[[VAL_6:.*]] = affine.apply #{{.*}}(){{\[}}%[[VAL_3]], %[[VAL_4]]] // CHECK: %[[VAL_7:.*]] = fir.alloca !fir.array, %[[VAL_6]] // CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (!fir.ref>) -> memref // CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> memref // CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_11:.*]] = %[[VAL_3]] to #map1(){{\[}}%[[VAL_4]]] { -// CHECK: %[[VAL_12:.*]] = affine.apply #map2(%[[VAL_11]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_11:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_4]]] { +// CHECK: %[[VAL_12:.*]] = affine.apply #{{.*}}(%[[VAL_11]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] // CHECK: %[[VAL_13:.*]] = affine.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = affine.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32 // CHECK: affine.store %[[VAL_15]], %[[VAL_10]]{{\[}}%[[VAL_12]]] : memref // CHECK: } // CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_17:.*]] = %[[VAL_3]] to #map1(){{\[}}%[[VAL_4]]] { -// CHECK: %[[VAL_18:.*]] = affine.apply #map2(%[[VAL_17]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_17:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_4]]] { +// CHECK: %[[VAL_18:.*]] = affine.apply #{{.*}}(%[[VAL_17]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] // CHECK: %[[VAL_19:.*]] = affine.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_20:.*]] = affine.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_21:.*]] = arith.mulf %[[VAL_19]], %[[VAL_20]] : f32 @@ -114,18 +114,18 @@ func.func @loop_with_if(%a: !arr_d1, %v: f32) { // CHECK: %[[VAL_5:.*]] = arith.constant 100 : index // CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_0]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { -// CHECK: %[[VAL_9:.*]] = affine.apply #map1(%[[VAL_8]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { +// CHECK: %[[VAL_9:.*]] = affine.apply #{{.*}}(%[[VAL_8]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_9]]] : memref // CHECK: } -// CHECK: affine.for %[[VAL_10:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { -// CHECK: %[[VAL_11:.*]] = affine.apply #map1(%[[VAL_10]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_10:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { +// CHECK: %[[VAL_11:.*]] = affine.apply #{{.*}}(%[[VAL_10]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref // CHECK: } -// CHECK: affine.for %[[VAL_12:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { +// CHECK: affine.for %[[VAL_12:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { // CHECK: %[[VAL_13:.*]] = arith.subi %[[VAL_12]], %[[VAL_4]] : index // CHECK: affine.if #set(%[[VAL_12]]) { -// CHECK: %[[VAL_14:.*]] = affine.apply #map1(%[[VAL_12]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: %[[VAL_14:.*]] = affine.apply #{{.*}}(%[[VAL_12]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_14]]] : memref // CHECK: } // CHECK: } diff --git a/mlir/docs/AttributesAndTypes.md b/mlir/docs/AttributesAndTypes.md index 7e54c2ee0cd1b..d19b1bf443ad7 100644 --- a/mlir/docs/AttributesAndTypes.md +++ b/mlir/docs/AttributesAndTypes.md @@ -959,6 +959,8 @@ User defined storage classes must adhere to the following: - Provide a method to hash an instance of the `KeyTy`. (Note: This is not necessary if an `llvm::DenseMapInfo` specialization exists) - `static llvm::hash_code hashKey(const KeyTy &)` +- Provide a method to generate the `KeyTy` from an instance of the storage class. + - `static KeyTy getAsKey()` Let's look at an example: @@ -997,6 +999,11 @@ struct ComplexTypeStorage : public TypeStorage { ComplexTypeStorage(key.first, key.second); } + /// Construct an instance of the key from this storage class. + KeyTy getAsKey() const { + return KeyTy(nonZeroParam, integerType); + } + /// The parametric data held by the storage class. unsigned nonZeroParam; Type integerType; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index b6b1f4c618ffd..0c689d0019f45 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -151,7 +151,7 @@ def LLVM_DIBasicTypeAttr : LLVM_Attr<"DIBasicType", "di_basic_type", //===----------------------------------------------------------------------===// def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins LLVM_DILanguageParameter:$sourceLanguage, @@ -168,7 +168,7 @@ def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit", [ //===----------------------------------------------------------------------===// def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DITagParameter:$tag, @@ -188,7 +188,7 @@ def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", //===----------------------------------------------------------------------===// def LLVM_DIDerivedTypeAttr : LLVM_Attr<"DIDerivedType", "di_derived_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DITagParameter:$tag, @@ -220,7 +220,7 @@ def LLVM_DIFileAttr : LLVM_Attr<"DIFile", "di_file", /*traits=*/[], "DIScopeAttr //===----------------------------------------------------------------------===// def LLVM_DILexicalBlockAttr : LLVM_Attr<"DILexicalBlock", "di_lexical_block", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -244,7 +244,7 @@ def LLVM_DILexicalBlockAttr : LLVM_Attr<"DILexicalBlock", "di_lexical_block", [ //===----------------------------------------------------------------------===// def LLVM_DILexicalBlockFile : LLVM_Attr<"DILexicalBlockFile", "di_lexical_block_file", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -266,7 +266,7 @@ def LLVM_DILexicalBlockFile : LLVM_Attr<"DILexicalBlockFile", "di_lexical_block_ //===----------------------------------------------------------------------===// def LLVM_DILocalVariableAttr : LLVM_Attr<"DILocalVariable", "di_local_variable", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DINodeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -296,7 +296,7 @@ def LLVM_DILocalVariableAttr : LLVM_Attr<"DILocalVariable", "di_local_variable", //===----------------------------------------------------------------------===// def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DICompileUnitAttr":$compileUnit, @@ -346,7 +346,7 @@ def LLVM_DISubrangeAttr : LLVM_Attr<"DISubrange", "di_subrange", /*traits=*/[], //===----------------------------------------------------------------------===// def LLVM_DISubroutineTypeAttr : LLVM_Attr<"DISubroutineType", "di_subroutine_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DICallingConventionParameter:$callingConvention, diff --git a/mlir/include/mlir/IR/BuiltinAttributes.td b/mlir/include/mlir/IR/BuiltinAttributes.td index 06eb6cb5f0424..70f47323fc85f 100644 --- a/mlir/include/mlir/IR/BuiltinAttributes.td +++ b/mlir/include/mlir/IR/BuiltinAttributes.td @@ -72,7 +72,7 @@ def Builtin_AffineMapAttr : Builtin_Attr<"AffineMap", [ //===----------------------------------------------------------------------===// def Builtin_ArrayAttr : Builtin_Attr<"Array", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A collection of other Attribute values"; let description = [{ @@ -510,7 +510,7 @@ def Builtin_DenseResourceElementsAttr : Builtin_Attr<"DenseResourceElements", [ //===----------------------------------------------------------------------===// def Builtin_DictionaryAttr : Builtin_Attr<"Dictionary", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An dictionary of named Attribute values"; let description = [{ @@ -1115,7 +1115,7 @@ def Builtin_StringAttr : Builtin_Attr<"String", [TypedAttrInterface]> { //===----------------------------------------------------------------------===// def Builtin_SymbolRefAttr : Builtin_Attr<"SymbolRef", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An Attribute containing a symbolic reference to an Operation"; let description = [{ @@ -1190,7 +1190,7 @@ def Builtin_SymbolRefAttr : Builtin_Attr<"SymbolRef", [ //===----------------------------------------------------------------------===// def Builtin_TypeAttr : Builtin_Attr<"Type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An Attribute containing a Type"; let description = [{ diff --git a/mlir/include/mlir/IR/BuiltinLocationAttributes.td b/mlir/include/mlir/IR/BuiltinLocationAttributes.td index ca96fb9e53bbc..0395e13295904 100644 --- a/mlir/include/mlir/IR/BuiltinLocationAttributes.td +++ b/mlir/include/mlir/IR/BuiltinLocationAttributes.td @@ -29,7 +29,7 @@ class Builtin_LocationAttr traits = []> //===----------------------------------------------------------------------===// def CallSiteLoc : Builtin_LocationAttr<"CallSiteLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A callsite source location"; let description = [{ @@ -108,7 +108,7 @@ def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> { //===----------------------------------------------------------------------===// def FusedLoc : Builtin_LocationAttr<"FusedLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A tuple of other source locations"; let description = [{ @@ -149,7 +149,7 @@ def FusedLoc : Builtin_LocationAttr<"FusedLoc", [ //===----------------------------------------------------------------------===// def NameLoc : Builtin_LocationAttr<"NameLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A named source location"; let description = [{ @@ -188,7 +188,7 @@ def NameLoc : Builtin_LocationAttr<"NameLoc", [ //===----------------------------------------------------------------------===// def OpaqueLoc : Builtin_LocationAttr<"OpaqueLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An opaque source location"; let description = [{ diff --git a/mlir/include/mlir/IR/Location.h b/mlir/include/mlir/IR/Location.h index 03f6e4e55896e..b772cf4b90e39 100644 --- a/mlir/include/mlir/IR/Location.h +++ b/mlir/include/mlir/IR/Location.h @@ -107,6 +107,9 @@ class Location { return LocationAttr(reinterpret_cast(pointer)); } + /// Support llvm style casting. + static bool classof(Attribute attr) { return llvm::isa(attr); } + protected: /// The internal backing location attribute. LocationAttr impl; @@ -167,6 +170,23 @@ inline OpaqueLoc OpaqueLoc::get(T underlyingLocation, MLIRContext *context) { return get(reinterpret_cast(underlyingLocation), TypeID::get(), UnknownLoc::get(context)); } + +//===----------------------------------------------------------------------===// +// SubElementInterfaces +//===----------------------------------------------------------------------===// + +/// Enable locations to be introspected as sub-elements. +template <> +struct AttrTypeSubElementHandler { + static void walk(Location param, AttrTypeSubElementWalker &walker) { + walker.walk(param); + } + static Location replace(Location param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return cast(attrRepls.take_front(1)[0]); + } +}; + } // namespace mlir //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/StorageUniquerSupport.h b/mlir/include/mlir/IR/StorageUniquerSupport.h index 074764caf33b1..ff5a0630e4fff 100644 --- a/mlir/include/mlir/IR/StorageUniquerSupport.h +++ b/mlir/include/mlir/IR/StorageUniquerSupport.h @@ -180,6 +180,9 @@ class StorageUserBase : public BaseT, public Traits... { return ConcreteT((const typename BaseT::ImplType *)ptr); } + /// Utility for easy access to the storage instance. + ImplType *getImpl() const { return static_cast(this->impl); } + protected: /// Mutate the current storage instance. This will not change the unique key. /// The arguments are forwarded to 'ConcreteT::mutate'. @@ -199,9 +202,6 @@ class StorageUserBase : public BaseT, public Traits... { return success(); } - /// Utility for easy access to the storage instance. - ImplType *getImpl() const { return static_cast(this->impl); } - private: /// Trait to check if T provides a 'ConcreteEntity' type alias. template diff --git a/mlir/include/mlir/IR/SubElementInterfaces.h b/mlir/include/mlir/IR/SubElementInterfaces.h index 2c40e4edfa0fa..0f3045de5f86a 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.h +++ b/mlir/include/mlir/IR/SubElementInterfaces.h @@ -23,6 +23,253 @@ template using SubElementReplFn = function_ref; template using SubElementResultReplFn = function_ref(T)>; + +//===----------------------------------------------------------------------===// +/// AttrTypeSubElementHandler +//===----------------------------------------------------------------------===// + +/// This class is used by AttrTypeSubElementHandler instances to walking sub +/// attributes and types. +class AttrTypeSubElementWalker { +public: + AttrTypeSubElementWalker(function_ref walkAttrsFn, + function_ref walkTypesFn) + : walkAttrsFn(walkAttrsFn), walkTypesFn(walkTypesFn) {} + + /// Walk an attribute. + void walk(Attribute element) { + if (element) + walkAttrsFn(element); + } + /// Walk a type. + void walk(Type element) { + if (element) + walkTypesFn(element); + } + /// Walk a range of attributes or types. + template + void walkRange(RangeT &&elements) { + for (auto element : elements) + walk(element); + } + +private: + function_ref walkAttrsFn; + function_ref walkTypesFn; +}; + +/// This class is used by AttrTypeSubElementHandler instances to process sub +/// element replacements. +template +class AttrTypeSubElementReplacements { +public: + AttrTypeSubElementReplacements(ArrayRef repls) : repls(repls) {} + + /// Take the first N replacements as an ArrayRef, dropping them from + /// this replacement list. + ArrayRef take_front(unsigned n) { + ArrayRef elements = repls.take_front(n); + repls = repls.drop_front(n); + return elements; + } + +private: + /// The current set of replacements. + ArrayRef repls; +}; +using AttrSubElementReplacements = AttrTypeSubElementReplacements; +using TypeSubElementReplacements = AttrTypeSubElementReplacements; + +/// This class provides support for interacting with the +/// SubElementInterfaces for different types of parameters. An +/// implementation of this class should be provided for any parameter class +/// that may contain an attribute or type. There are two main methods of +/// this class that need to be implemented: +/// +/// - walk +/// +/// This method should traverse into any sub elements of the parameter +/// using the provided walker, or by invoking handlers for sub-types. +/// +/// - replace +/// +/// This method should extract any necessary sub elements using the +/// provided replacer, or by invoking handlers for sub-types. The new +/// post-replacement parameter value should be returned. +/// +template +struct AttrTypeSubElementHandler { + /// Default walk implementation that does nothing. + static inline void walk(const T ¶m, AttrTypeSubElementWalker &walker) {} + + /// Default replace implementation just forwards the parameter. + template + static inline decltype(auto) replace(ParamT &¶m, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return std::forward(param); + } + + /// Tag indicating that this handler does not support sub-elements. + using DefaultHandlerTag = void; +}; + +/// Detect if any of the given parameter types has a sub-element handler. +namespace detail { +template +using has_default_sub_element_handler_t = decltype(T::DefaultHandlerTag); +} // namespace detail +template +inline constexpr bool has_sub_attr_or_type_v = + (!llvm::is_detected::value || + ...); + +/// Implementation for derived Attributes and Types. +template +struct AttrTypeSubElementHandler< + T, std::enable_if_t || + std::is_base_of_v>> { + static void walk(T param, AttrTypeSubElementWalker &walker) { + walker.walk(param); + } + static T replace(T param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + if (!param) + return T(); + if constexpr (std::is_base_of_v) { + return cast(attrRepls.take_front(1)[0]); + } else if constexpr (!detail::IsInterface::value && + std::is_base_of_v) { + return cast(typeRepls.take_front(1)[0]); + } + } +}; +template <> +struct AttrTypeSubElementHandler { + template + static void walk(T param, AttrTypeSubElementWalker &walker) { + walker.walk(param.getName()); + walker.walk(param.getValue()); + } + template + static T replace(T param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + ArrayRef paramRepls = attrRepls.take_front(2); + return T(cast(paramRepls[0]), paramRepls[1]); + } +}; +/// Implementation for derived ArrayRef. +template +struct AttrTypeSubElementHandler, + std::enable_if_t>> { + using EltHandler = AttrTypeSubElementHandler; + + static void walk(ArrayRef param, AttrTypeSubElementWalker &walker) { + for (const T &subElement : param) + EltHandler::walk(subElement, walker); + } + static auto replace(ArrayRef param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + // Normal attributes/types can extract using the replacer directly. + if constexpr (std::is_base_of_v && + sizeof(T) == sizeof(Attribute)) { + ArrayRef attrs = attrRepls.take_front(param.size()); + return ArrayRef((const T *)attrs.data(), attrs.size()); + } else if constexpr (std::is_base_of_v && + sizeof(T) == sizeof(Type)) { + ArrayRef types = typeRepls.take_front(param.size()); + return ArrayRef((const T *)types.data(), types.size()); + } else { + // Otherwise, we need to allocate storage for the new elements. + SmallVector newElements; + for (const T &element : param) + newElements.emplace_back( + EltHandler::replace(element, attrRepls, typeRepls)); + return newElements; + } + } +}; +/// Implementation for Tuple. +template +struct AttrTypeSubElementHandler< + std::tuple, std::enable_if_t>> { + static void walk(const std::tuple ¶m, + AttrTypeSubElementWalker &walker) { + std::apply( + [&](auto &&...params) { + (AttrTypeSubElementHandler::walk(params, walker), ...); + }, + param); + } + static auto replace(const std::tuple ¶m, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return std::apply( + [&](const Ts &...params) + -> std::tuple::replace( + params, attrRepls, typeRepls))...> { + return {AttrTypeSubElementHandler::replace(params, attrRepls, + typeRepls)...}; + }, + param); + } +}; + +namespace detail { +template +struct is_tuple : public std::false_type {}; +template +struct is_tuple> : public std::true_type {}; + +/// This function provides the underlying implementation for the +/// SubElementInterface walk method, using the key type of the derived +/// attribute/type to interact with the individual parameters. +template +void walkImmediateSubElementsImpl(T derived, + function_ref walkAttrsFn, + function_ref walkTypesFn) { + auto key = static_cast(derived.getImpl())->getAsKey(); + + // If we don't have any sub-elements, there is nothing to do. + if constexpr (!has_sub_attr_or_type_v) { + return; + } else { + AttrTypeSubElementWalker walker(walkAttrsFn, walkTypesFn); + AttrTypeSubElementHandler::walk(key, walker); + } +} + +/// This function provides the underlying implementation for the +/// SubElementInterface replace method, using the key type of the derived +/// attribute/type to interact with the individual parameters. +template +T replaceImmediateSubElementsImpl(T derived, ArrayRef &replAttrs, + ArrayRef &replTypes) { + auto key = static_cast(derived.getImpl())->getAsKey(); + + // If we don't have any sub-elements, we can just return the original. + if constexpr (!has_sub_attr_or_type_v) { + return derived; + + // Otherwise, we need to replace any necessary sub-elements. + } else { + AttrSubElementReplacements attrRepls(replAttrs); + TypeSubElementReplacements typeRepls(replTypes); + auto newKey = AttrTypeSubElementHandler::replace( + key, attrRepls, typeRepls); + if constexpr (is_tuple::value) { + return std::apply( + [&](auto &&...params) { + return T::Base::get(derived.getContext(), + std::forward(params)...); + }, + newKey); + } else { + return T::Base::get(derived.getContext(), newKey); + } + } +} +} // namespace detail } // namespace mlir /// Include the definitions of the sub elemnt interfaces. diff --git a/mlir/include/mlir/IR/SubElementInterfaces.td b/mlir/include/mlir/IR/SubElementInterfaces.td index 3718b38238c23..abb5afcc93aa1 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.td +++ b/mlir/include/mlir/IR/SubElementInterfaces.td @@ -32,7 +32,11 @@ class SubElementInterfaceBase":$walkAttrsFn, - "llvm::function_ref":$walkTypesFn) + "llvm::function_ref":$walkTypesFn), + /*methodBody=*/[{}], /*defaultImplementation=*/[{ + ::mlir::detail::walkImmediateSubElementsImpl( + }] # derivedValue # [{, walkAttrsFn, walkTypesFn); + }] >, InterfaceMethod< /*desc=*/[{ @@ -47,10 +51,13 @@ class SubElementInterfaceBase":$replAttrs, - "::llvm::ArrayRef<::mlir::Type>":$replTypes - )>, + }], attrOrType, "replaceImmediateSubElements", + (ins "::llvm::ArrayRef<::mlir::Attribute>":$replAttrs, + "::llvm::ArrayRef<::mlir::Type>":$replTypes), + /*methodBody=*/[{}], /*defaultImplementation=*/[{ + return ::mlir::detail::replaceImmediateSubElementsImpl( + }] # derivedValue # [{, replAttrs, replTypes); + }]>, ]; code extraClassDeclaration = [{ @@ -154,6 +161,9 @@ def SubElementAttrInterface let description = [{ An interface used to query and manipulate sub-elements, such as sub-types and sub-attributes of a composite attribute. + + To support the introspection of custom parameters that hold sub-elements, + a specialization of the `AttrTypeSubElementHandler` class must be provided. }]; } @@ -168,6 +178,9 @@ def SubElementTypeInterface let description = [{ An interface used to query and manipulate sub-elements, such as sub-types and sub-attributes of a composite type. + + To support the introspection of custom parameters that hold sub-elements, + a specialization of the `AttrTypeSubElementHandler` class must be provided. }]; } diff --git a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h index 5bbab1f994ece..7f65707e98726 100644 --- a/mlir/include/mlir/IR/TypeRange.h +++ b/mlir/include/mlir/IR/TypeRange.h @@ -165,6 +165,23 @@ inline bool operator==(ArrayRef lhs, const ValueTypeRange &rhs) { std::equal(lhs.begin(), lhs.end(), rhs.begin()); } +//===----------------------------------------------------------------------===// +// SubElementInterfaces +//===----------------------------------------------------------------------===// + +/// Enable TypeRange to be introspected for sub-elements. +template <> +struct AttrTypeSubElementHandler { + static void walk(TypeRange param, AttrTypeSubElementWalker &walker) { + walker.walkRange(param); + } + static TypeRange replace(TypeRange param, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return typeRepls.take_front(param.size()); + } +}; + } // namespace mlir namespace llvm { diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 7d0c2297736b2..3927e8169c0f4 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -64,169 +64,6 @@ bool DITypeAttr::classof(Attribute attr) { return llvm::isa(attr); } -//===----------------------------------------------------------------------===// -// DICompileUnitAttr -//===----------------------------------------------------------------------===// - -void DICompileUnitAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getFile()); - walkAttrsFn(getProducer()); -} - -Attribute -DICompileUnitAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), getSourceLanguage(), replAttrs[0].cast(), - replAttrs[1].cast(), getIsOptimized(), - getEmissionKind()); -} - -//===----------------------------------------------------------------------===// -// DICompositeTypeAttr -//===----------------------------------------------------------------------===// - -void DICompositeTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getFile()); - walkAttrsFn(getScope()); - for (DINodeAttr element : getElements()) - walkAttrsFn(element); -} - -Attribute DICompositeTypeAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - ArrayRef elements = replAttrs.drop_front(3); - return get( - getContext(), getTag(), replAttrs[0].cast(), - cast_or_null(replAttrs[1]), getLine(), - cast_or_null(replAttrs[2]), getSizeInBits(), - getAlignInBits(), - ArrayRef(static_cast(elements.data()), - elements.size())); -} - -//===----------------------------------------------------------------------===// -// DIDerivedTypeAttr -//===----------------------------------------------------------------------===// - -void DIDerivedTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getBaseType()); -} - -Attribute -DIDerivedTypeAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), getTag(), replAttrs[0].cast(), - replAttrs[1].cast(), getSizeInBits(), getAlignInBits(), - getOffsetInBits()); -} - -//===----------------------------------------------------------------------===// -// DILexicalBlockAttr -//===----------------------------------------------------------------------===// - -void DILexicalBlockAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getFile()); -} - -Attribute DILexicalBlockAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replAttrs[0].cast(), replAttrs[1].cast(), - getLine(), getColumn()); -} - -//===----------------------------------------------------------------------===// -// DILexicalBlockFileAttr -//===----------------------------------------------------------------------===// - -void DILexicalBlockFileAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getFile()); -} - -Attribute DILexicalBlockFileAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replAttrs[0].cast(), replAttrs[1].cast(), - getDescriminator()); -} - -//===----------------------------------------------------------------------===// -// DILocalVariableAttr -//===----------------------------------------------------------------------===// - -void DILocalVariableAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getName()); - walkAttrsFn(getFile()); - walkAttrsFn(getType()); -} - -Attribute DILocalVariableAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getContext(), replAttrs[0].cast(), - replAttrs[1].cast(), replAttrs[2].cast(), - getLine(), getArg(), getAlignInBits(), - replAttrs[3].cast()); -} - -//===----------------------------------------------------------------------===// -// DISubprogramAttr -//===----------------------------------------------------------------------===// - -void DISubprogramAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getCompileUnit()); - walkAttrsFn(getScope()); - walkAttrsFn(getName()); - walkAttrsFn(getLinkageName()); - walkAttrsFn(getFile()); - walkAttrsFn(getType()); -} - -Attribute -DISubprogramAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replAttrs[0].cast(), - replAttrs[1].cast(), replAttrs[2].cast(), - replAttrs[3].cast(), replAttrs[4].cast(), - getLine(), getScopeLine(), getSubprogramFlags(), - replAttrs[5].cast()); -} - -//===----------------------------------------------------------------------===// -// DISubroutineTypeAttr -//===----------------------------------------------------------------------===// - -void DISubroutineTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (DITypeAttr type : getTypes()) - walkAttrsFn(type); -} - -Attribute DISubroutineTypeAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get( - getContext(), getCallingConvention(), - ArrayRef(static_cast(replAttrs.data()), - replAttrs.size())); -} - //===----------------------------------------------------------------------===// // LoopOptionsAttrBuilder //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 99fa193185a61..133fc6036931e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -183,20 +183,6 @@ LLVMArrayType::getPreferredAlignment(const DataLayout &dataLayout, return dataLayout.getTypePreferredAlignment(getElementType()); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMArrayType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMArrayType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), getNumElements()); -} - //===----------------------------------------------------------------------===// // Function type. //===----------------------------------------------------------------------===// @@ -247,22 +233,6 @@ LLVMFunctionType::verify(function_ref emitError, return success(); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMFunctionType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getReturnType()); - for (Type type : getParams()) - walkTypesFn(type); -} - -Type LLVMFunctionType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), replTypes.drop_front(), isVarArg()); -} - //===----------------------------------------------------------------------===// // LLVMPointerType //===----------------------------------------------------------------------===// @@ -439,20 +409,6 @@ LogicalResult LLVMPointerType::verifyEntries(DataLayoutEntryListRef entries, return success(); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMPointerType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMPointerType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getContext(), replTypes.front(), getAddressSpace()); -} - //===----------------------------------------------------------------------===// // Struct type. //===----------------------------------------------------------------------===// @@ -749,17 +705,6 @@ LLVMFixedVectorType::verify(function_ref emitError, emitError, elementType, numElements); } -void LLVMFixedVectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMFixedVectorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes[0], getNumElements()); -} - //===----------------------------------------------------------------------===// // LLVMScalableVectorType. //===----------------------------------------------------------------------===// @@ -792,17 +737,6 @@ LLVMScalableVectorType::verify(function_ref emitError, emitError, elementType, numElements); } -void LLVMScalableVectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMScalableVectorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes[0], getMinNumElements()); -} - //===----------------------------------------------------------------------===// // Utility functions. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index ed22134d1dcc8..8a3c162f59423 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -43,23 +43,6 @@ void BuiltinDialect::registerAttributes() { >(); } -//===----------------------------------------------------------------------===// -// ArrayAttr -//===----------------------------------------------------------------------===// - -void ArrayAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Attribute attr : getValue()) - walkAttrsFn(attr); -} - -Attribute -ArrayAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replAttrs); -} - //===----------------------------------------------------------------------===// // DictionaryAttr //===----------------------------------------------------------------------===// @@ -217,25 +200,6 @@ DictionaryAttr DictionaryAttr::getEmptyUnchecked(MLIRContext *context) { return Base::get(context, ArrayRef()); } -void DictionaryAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (const NamedAttribute &attr : getValue()) - walkAttrsFn(attr.getValue()); -} - -Attribute -DictionaryAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - std::vector vec = getValue().vec(); - for (auto &it : llvm::enumerate(replAttrs)) - vec[it.index()].setValue(it.value()); - - // The above only modifies the mapped value, but not the key, and therefore - // not the order of the elements. It remains sorted - return getWithSorted(getContext(), vec); -} - //===----------------------------------------------------------------------===// // StridedLayoutAttr //===----------------------------------------------------------------------===// @@ -375,24 +339,6 @@ StringAttr SymbolRefAttr::getLeafReference() const { return nestedRefs.empty() ? getRootReference() : nestedRefs.back().getAttr(); } -void SymbolRefAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getRootReference()); - for (FlatSymbolRefAttr ref : getNestedReferences()) - walkAttrsFn(ref); -} - -Attribute -SymbolRefAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - ArrayRef rawNestedRefs = replAttrs.drop_front(); - ArrayRef nestedRefs( - static_cast(rawNestedRefs.data()), - rawNestedRefs.size()); - return get(replAttrs[0].cast(), nestedRefs); -} - //===----------------------------------------------------------------------===// // IntegerAttr //===----------------------------------------------------------------------===// @@ -1812,22 +1758,6 @@ SparseElementsAttr::verify(function_ref emitError, return success(); } -//===----------------------------------------------------------------------===// -// TypeAttr -//===----------------------------------------------------------------------===// - -void TypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getValue()); -} - -Attribute -TypeAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replTypes[0]); -} - //===----------------------------------------------------------------------===// // Attribute Utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index fe6d6ac3b2c4d..d65c5e9d28b1e 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -187,20 +187,6 @@ FunctionType::getWithoutArgsAndResults(const BitVector &argIndices, return clone(newArgTypes, newResultTypes); } -void FunctionType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Type type : llvm::concat(getInputs(), getResults())) - walkTypesFn(type); -} - -Type FunctionType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - unsigned numInputs = getNumInputs(); - return get(getContext(), replTypes.take_front(numInputs), - replTypes.drop_front(numInputs)); -} - //===----------------------------------------------------------------------===// // OpaqueType //===----------------------------------------------------------------------===// @@ -258,17 +244,6 @@ VectorType VectorType::scaleElementBitwidth(unsigned scale) { return VectorType(); } -void VectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type VectorType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getShape(), replTypes.front(), getNumScalableDims()); -} - VectorType VectorType::cloneWith(Optional> shape, Type elementType) const { return VectorType::get(shape.value_or(getShape()), elementType, @@ -343,20 +318,6 @@ RankedTensorType::verify(function_ref emitError, return checkTensorElementType(emitError, elementType); } -void RankedTensorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - if (Attribute encoding = getEncoding()) - walkAttrsFn(encoding); -} - -Type RankedTensorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getShape(), replTypes.front(), - replAttrs.empty() ? Attribute() : replAttrs.back()); -} - //===----------------------------------------------------------------------===// // UnrankedTensorType //===----------------------------------------------------------------------===// @@ -367,17 +328,6 @@ UnrankedTensorType::verify(function_ref emitError, return checkTensorElementType(emitError, elementType); } -void UnrankedTensorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type UnrankedTensorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front()); -} - //===----------------------------------------------------------------------===// // BaseMemRefType //===----------------------------------------------------------------------===// @@ -671,24 +621,6 @@ LogicalResult MemRefType::verify(function_ref emitError, return success(); } -void MemRefType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - if (!getLayout().isIdentity()) - walkAttrsFn(getLayout()); - walkAttrsFn(getMemorySpace()); -} - -Type MemRefType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - bool hasLayout = replAttrs.size() > 1; - return get(getShape(), replTypes[0], - hasLayout ? replAttrs[0].dyn_cast() - : MemRefLayoutAttrInterface(), - hasLayout ? replAttrs[1] : replAttrs[0]); -} - //===----------------------------------------------------------------------===// // UnrankedMemRefType //===----------------------------------------------------------------------===// @@ -870,18 +802,6 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t, return success(); } -void UnrankedMemRefType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - walkAttrsFn(getMemorySpace()); -} - -Type UnrankedMemRefType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), replAttrs.front()); -} - //===----------------------------------------------------------------------===// /// TupleType //===----------------------------------------------------------------------===// @@ -905,18 +825,6 @@ void TupleType::getFlattenedTypes(SmallVectorImpl &types) { /// Return the number of element types. size_t TupleType::size() const { return getImpl()->size(); } -void TupleType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Type type : getTypes()) - walkTypesFn(type); -} - -Type TupleType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replTypes); -} - //===----------------------------------------------------------------------===// // Type Utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp index 8a8801daa1160..dcbf9dcecfe29 100644 --- a/mlir/lib/IR/Location.cpp +++ b/mlir/lib/IR/Location.cpp @@ -80,20 +80,6 @@ CallSiteLoc CallSiteLoc::get(Location name, ArrayRef frames) { return CallSiteLoc::get(name, caller); } -void CallSiteLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getCallee()); - walkAttrsFn(getCaller()); -} - -Attribute -CallSiteLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replAttrs[0].cast(), - replAttrs[1].cast()); -} - //===----------------------------------------------------------------------===// // FusedLoc //===----------------------------------------------------------------------===// @@ -135,55 +121,3 @@ Location FusedLoc::get(ArrayRef locs, Attribute metadata, return Base::get(context, locs, metadata); } - -void FusedLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Attribute attr : getLocations()) - walkAttrsFn(attr); - walkAttrsFn(getMetadata()); -} - -Attribute -FusedLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - SmallVector newLocs; - newLocs.reserve(replAttrs.size() - 1); - for (Attribute attr : replAttrs.drop_back()) - newLocs.push_back(attr.cast()); - return get(getContext(), newLocs, replAttrs.back()); -} - -//===----------------------------------------------------------------------===// -// NameLoc -//===----------------------------------------------------------------------===// - -void NameLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getChildLoc()); -} - -Attribute NameLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replAttrs[0].cast(), - replAttrs[1].cast()); -} - -//===----------------------------------------------------------------------===// -// OpaqueLoc -//===----------------------------------------------------------------------===// - -void OpaqueLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getFallbackLocation()); -} - -Attribute -OpaqueLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getUnderlyingLocation(), getUnderlyingTypeID(), - replAttrs[0].cast()); -} diff --git a/mlir/lib/IR/SubElementInterfaces.cpp b/mlir/lib/IR/SubElementInterfaces.cpp index fd05b9d01eea4..ae0223f0936ef 100644 --- a/mlir/lib/IR/SubElementInterfaces.cpp +++ b/mlir/lib/IR/SubElementInterfaces.cpp @@ -27,11 +27,6 @@ static void walkSubElementsImpl(InterfaceT interface, DenseSet &visitedTypes) { interface.walkImmediateSubElements( [&](Attribute attr) { - // Guard against potentially null inputs. This removes the need for the - // derived attribute/type to do it. - if (!attr) - return; - // Avoid infinite recursion when visiting sub attributes later, if this // is a mutable attribute. if (LLVM_UNLIKELY(attr.hasTrait())) { @@ -48,11 +43,6 @@ static void walkSubElementsImpl(InterfaceT interface, walkAttrsFn(attr); }, [&](Type type) { - // Guard against potentially null inputs. This removes the need for the - // derived attribute/type to do it. - if (!type) - return; - // Avoid infinite recursion when visiting sub types later, if this // is a mutable type. if (LLVM_UNLIKELY(type.hasTrait())) { @@ -103,10 +93,6 @@ static void updateSubElementImpl( return; newElements.push_back(element); - // Guard against potentially null inputs. We always map null to null. - if (!element) - return; - // Check for an existing mapping for this element, and walk it if we haven't // yet. T *mappedElement = &visited[element]; diff --git a/mlir/lib/IR/TypeDetail.h b/mlir/lib/IR/TypeDetail.h index 1ae66555715f7..9dc8e6380c795 100644 --- a/mlir/lib/IR/TypeDetail.h +++ b/mlir/lib/IR/TypeDetail.h @@ -47,6 +47,8 @@ struct IntegerTypeStorage : public TypeStorage { IntegerTypeStorage(key.first, key.second); } + KeyTy getAsKey() const { return KeyTy(width, signedness); } + unsigned width : 30; IntegerType::SignednessSemantics signedness : 2; }; @@ -59,7 +61,7 @@ struct FunctionTypeStorage : public TypeStorage { inputsAndResults(inputsAndResults) {} /// The hash key used for uniquing. - using KeyTy = std::pair; + using KeyTy = std::tuple; bool operator==(const KeyTy &key) const { if (std::get<0>(key) == getInputs()) return std::get<1>(key) == getResults(); @@ -69,7 +71,7 @@ struct FunctionTypeStorage : public TypeStorage { /// Construction. static FunctionTypeStorage *construct(TypeStorageAllocator &allocator, const KeyTy &key) { - TypeRange inputs = key.first, results = key.second; + auto [inputs, results] = key; // Copy the inputs and results into the bump pointer. SmallVector types; @@ -90,6 +92,8 @@ struct FunctionTypeStorage : public TypeStorage { return ArrayRef(inputsAndResults + numInputs, numResults); } + KeyTy getAsKey() const { return KeyTy(getInputs(), getResults()); } + unsigned numInputs; unsigned numResults; Type const *inputsAndResults; @@ -127,6 +131,8 @@ struct TupleTypeStorage final return {getTrailingObjects(), size()}; } + KeyTy getAsKey() const { return getTypes(); } + /// The number of tuple elements. unsigned numElements; }; diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir index b84ffe10867a9..e6c33fd9292fb 100644 --- a/mlir/test/Dialect/Affine/loop-tiling.mlir +++ b/mlir/test/Dialect/Affine/loop-tiling.mlir @@ -133,8 +133,8 @@ func.func @tile_with_symbolic_loop_upper_bounds(%arg0: memref, %arg1: m // CHECK: memref.dim %{{.*}}, %c0 : memref // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} step 32 { -// CHECK-NEXT: affine.for %{{.*}} = #map(%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { -// CHECK-NEXT: affine.for %{{.*}} = #map(%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { +// CHECK-NEXT: affine.for %{{.*}} = #[[$MAP:.*]](%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { +// CHECK-NEXT: affine.for %{{.*}} = #[[$MAP]](%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK-NEXT: affine.load diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index c96f95a1b517a..4ff1f19fe36b5 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -775,9 +775,9 @@ func.func @input_stays_same(%arg0 : memref>, %arg1 return %shape : memref } -// CHECK: #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)> -// CHECK: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> -// CHECK: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK: func @input_stays_same( // CHECK-SAME: %[[ARG0:.*]]: memref>, // CHECK-SAME: %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref) diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 393f0f49e15f7..ded7374d3ed82 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -416,7 +416,7 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_when_a // CHECK-NEXT: affine.for %[[ARG6:.*]] = 0 to 1 { // CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]], %[[ARG4]], %[[ARG5]], %[[ARG6]]) // CHECK-NEXT: %[[TMP2:.*]] = affine.apply #[[$MAP1]](%[[ARG3]], %[[TMP1]]) -// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #map2(%[[ARG5]], %[[ARG6]]) +// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #{{.*}}(%[[ARG5]], %[[ARG6]]) // CHECK-NEXT: affine.load %[[ARG0]][%[[TMP2]], %[[TMP3]]] : memref<1024x1024xf32> // ----- diff --git a/mlir/test/Dialect/SCF/for-loop-specialization.mlir b/mlir/test/Dialect/SCF/for-loop-specialization.mlir index 40e8d7dfe4571..ff66c6c1e47f5 100644 --- a/mlir/test/Dialect/SCF/for-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/for-loop-specialization.mlir @@ -23,7 +23,7 @@ func.func @for(%outer: index, %A: memref, %B: memref, // CHECK: [[CST_0:%.*]] = arith.constant 0 : index // CHECK: [[CST_1:%.*]] = arith.constant 1 : index // CHECK: [[DIM_0:%.*]] = memref.dim [[ARG1]], [[CST_0]] : memref -// CHECK: [[MIN:%.*]] = affine.min #map(){{\[}}[[DIM_0]], [[ARG0]]] +// CHECK: [[MIN:%.*]] = affine.min #{{.*}}(){{\[}}[[DIM_0]], [[ARG0]]] // CHECK: [[CST_1024:%.*]] = arith.constant 1024 : index // CHECK: [[PRED:%.*]] = arith.cmpi eq, [[MIN]], [[CST_1024]] : index // CHECK: scf.if [[PRED]] { diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir index f03254405bfee..73c823ca8d55e 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir @@ -26,8 +26,8 @@ func.func @parallel_loop(%outer_i0: index, %outer_i1: index, %A: memref // CHECK: [[VAL_7:%.*]] = arith.constant 1 : index // CHECK: [[VAL_8:%.*]] = memref.dim [[VAL_2]], [[VAL_6]] : memref // CHECK: [[VAL_9:%.*]] = memref.dim [[VAL_2]], [[VAL_7]] : memref -// CHECK: [[VAL_10:%.*]] = affine.min #map(){{\[}}[[VAL_8]], [[VAL_0]]] -// CHECK: [[VAL_11:%.*]] = affine.min #map1(){{\[}}[[VAL_9]], [[VAL_1]]] +// CHECK: [[VAL_10:%.*]] = affine.min #{{.*}}(){{\[}}[[VAL_8]], [[VAL_0]]] +// CHECK: [[VAL_11:%.*]] = affine.min #{{.*}}(){{\[}}[[VAL_9]], [[VAL_1]]] // CHECK: [[VAL_12:%.*]] = arith.constant 1024 : index // CHECK: [[VAL_13:%.*]] = arith.cmpi eq, [[VAL_10]], [[VAL_12]] : index // CHECK: [[VAL_14:%.*]] = arith.constant 64 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir index 897f60b29fdbd..41b0d85b3752e 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir @@ -13,7 +13,7 @@ func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, return } -// CHECK: #map = affine_map<(d0, d1, d2) -> (d0, d1 - d2)> +// CHECK: #[[$MAP:.*]] = affine_map<(d0, d1, d2) -> (d0, d1 - d2)> // CHECK-LABEL: func @parallel_loop( // CHECK-SAME: [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index, [[ARG7:%.*]]: memref, [[ARG8:%.*]]: memref, [[ARG9:%.*]]: memref, [[ARG10:%.*]]: memref) { // CHECK: [[C0:%.*]] = arith.constant 0 : index @@ -22,8 +22,8 @@ func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, // CHECK: [[V1:%.*]] = arith.muli [[ARG5]], [[C1]] : index // CHECK: [[V2:%.*]] = arith.muli [[ARG6]], [[C4]] : index // CHECK: scf.parallel ([[V3:%.*]], [[V4:%.*]]) = ([[ARG1]], [[ARG2]]) to ([[ARG3]], [[ARG4]]) step ([[V1]], [[V2]]) { -// CHECK: [[V5:%.*]] = affine.min #map([[V1]], [[ARG3]], [[V3]]) -// CHECK: [[V6:%.*]] = affine.min #map([[V2]], [[ARG4]], [[V4]]) +// CHECK: [[V5:%.*]] = affine.min #[[$MAP]]([[V1]], [[ARG3]], [[V3]]) +// CHECK: [[V6:%.*]] = affine.min #[[$MAP]]([[V2]], [[ARG4]], [[V4]]) // CHECK: scf.parallel ([[V7:%.*]], [[V8:%.*]]) = ([[C0]], [[C0]]) to ([[V5]], [[V6]]) step ([[ARG5]], [[ARG6]]) { // CHECK: [[V9:%.*]] = arith.addi [[V7]], [[V3]] : index // CHECK: [[V10:%.*]] = arith.addi [[V8]], [[V4]] : index @@ -91,7 +91,7 @@ func.func @tile_nested_innermost() { // CHECK: [[V3:%.*]] = arith.muli [[C1]], [[C1_1]] : index // CHECK: [[V4:%.*]] = arith.muli [[C1]], [[C4]] : index // CHECK: scf.parallel ([[V5:%.*]], [[V6:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V3]], [[V4]]) { -// CHECK: [[V7:%.*]] = affine.min #map([[V4]], [[C2]], [[V6]]) +// CHECK: [[V7:%.*]] = affine.min #{{.*}}([[V4]], [[C2]], [[V6]]) // CHECK: scf.parallel ([[V8:%.*]], [[V9:%.*]]) = ([[C0_1]], [[C0_1]]) to ([[V3]], [[V7]]) step ([[C1]], [[C1]]) { // CHECK: = arith.addi [[V8]], [[V5]] : index // CHECK: = arith.addi [[V9]], [[V6]] : index @@ -104,7 +104,7 @@ func.func @tile_nested_innermost() { // CHECK: [[V10:%.*]] = arith.muli [[C1]], [[C1_2]] : index // CHECK: [[V11:%.*]] = arith.muli [[C1]], [[C4_1]] : index // CHECK: scf.parallel ([[V12:%.*]], [[V13:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V10]], [[V11]]) { -// CHECK: [[V14:%.*]] = affine.min #map([[V11]], [[C2]], [[V13]]) +// CHECK: [[V14:%.*]] = affine.min #{{.*}}([[V11]], [[C2]], [[V13]]) // CHECK: scf.parallel ([[V15:%.*]], [[V16:%.*]]) = ([[C0_2]], [[C0_2]]) to ([[V10]], [[V14]]) step ([[C1]], [[C1]]) { // CHECK: = arith.addi [[V15]], [[V12]] : index // CHECK: = arith.addi [[V16]], [[V13]] : index diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 628ce3b4535a5..7f850ccbbc4e2 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s +// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s #SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> diff --git a/mlir/test/IR/affine-map.mlir b/mlir/test/IR/affine-map.mlir index 56a105630f9d2..977aec2536b1e 100644 --- a/mlir/test/IR/affine-map.mlir +++ b/mlir/test/IR/affine-map.mlir @@ -1,10 +1,7 @@ // RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s // Identity maps used in trivial compositions in MemRefs are optimized away. -// CHECK-NOT: #map{{[0-9]*}} = affine_map<(d0, d1) -> (d0, d1)> #map0 = affine_map<(i, j) -> (i, j)> - -// CHECK-NOT: #map{{[0-9]*}} = affine_map<(d0, d1)[s0] -> (d0, d1)> #map1 = affine_map<(i, j)[s0] -> (i, j)> // CHECK: #map{{[0-9]*}} = affine_map<() -> (0)> @@ -194,7 +191,6 @@ // Check if parser can parse affine_map with identifiers that collide with // integer types. -// CHECK: #map{{[0-9]*}} = affine_map<(d0, d1) -> (d0, d1)> #map60 = affine_map<(i0, i1) -> (i0, i1)> // Check if parser can parse affine_map with identifiers that collide with diff --git a/mlir/test/IR/memory-ops.mlir b/mlir/test/IR/memory-ops.mlir index fbbf36d6bc210..c1cfc3bfa0dbf 100644 --- a/mlir/test/IR/memory-ops.mlir +++ b/mlir/test/IR/memory-ops.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s | FileCheck %s -// CHECK: #map = affine_map<(d0, d1)[s0] -> (d0 + s0, d1)> +// CHECK: #[[$MAP:.*]] = affine_map<(d0, d1)[s0] -> (d0 + s0, d1)> // CHECK-LABEL: func @alloc() { func.func @alloc() { @@ -17,11 +17,11 @@ func.func @alloc() { %1 = memref.alloc(%c0, %c1) : memref (d0, d1)>, 1> // Test alloc with no dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloc()[%{{.*}}] : memref<2x4xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloc()[%{{.*}}] : memref<2x4xf32, #[[$MAP]], 1> %2 = memref.alloc()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // Test alloc with dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloc(%{{.*}})[%{{.*}}] : memref<2x?xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloc(%{{.*}})[%{{.*}}] : memref<2x?xf32, #[[$MAP]], 1> %3 = memref.alloc(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1> // Alloc with no mappings. @@ -48,11 +48,11 @@ func.func @alloca() { %1 = memref.alloca(%c0, %c1) : memref (d0, d1)>, 1> // Test alloca with no dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloca()[%{{.*}}] : memref<2x4xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloca()[%{{.*}}] : memref<2x4xf32, #[[$MAP]], 1> %2 = memref.alloca()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // Test alloca with dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloca(%{{.*}})[%{{.*}}] : memref<2x?xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloca(%{{.*}})[%{{.*}}] : memref<2x?xf32, #[[$MAP]], 1> %3 = memref.alloca(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1> // Alloca with no mappings, but with alignment. diff --git a/mlir/test/Transforms/loop-fusion-2.mlir b/mlir/test/Transforms/loop-fusion-2.mlir index 729e1dc2d9e80..c1fded7a16bb9 100644 --- a/mlir/test/Transforms/loop-fusion-2.mlir +++ b/mlir/test/Transforms/loop-fusion-2.mlir @@ -508,16 +508,16 @@ func.func @fuse_across_dim_mismatch(%arg0: memref<4x4x16x1xf32>, %arg1: memref<1 } return } -// MAXIMAL: #map = affine_map<(d0, d1) -> (d0 * 16 + d1)> +// MAXIMAL: #[[$MAP:.*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)> // MAXIMAL-LABEL: func @fuse_across_dim_mismatch // MAXIMAL: memref.alloc() : memref<1x1xf32> // MAXIMAL: affine.for %{{.*}} = 0 to 9 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 9 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 4 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 16 { -// MAXIMAL-NEXT: affine.apply #map(%{{.*}}, %{{.*}}) +// MAXIMAL-NEXT: affine.apply #[[$MAP]](%{{.*}}, %{{.*}}) // MAXIMAL-NEXT: affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xf32> -// MAXIMAL-NEXT: affine.apply #map(%{{.*}}, %{{.*}}) +// MAXIMAL-NEXT: affine.apply #[[$MAP]](%{{.*}}, %{{.*}}) // MAXIMAL-NEXT: affine.load %{{.*}}[0, 0] : memref<1x1xf32> // MAXIMAL-NEXT: } // MAXIMAL-NEXT: } diff --git a/mlir/test/Transforms/normalize-memrefs-ops.mlir b/mlir/test/Transforms/normalize-memrefs-ops.mlir index b45b62a92e4a6..34420c50a51ab 100644 --- a/mlir/test/Transforms/normalize-memrefs-ops.mlir +++ b/mlir/test/Transforms/normalize-memrefs-ops.mlir @@ -29,15 +29,15 @@ func.func @test_norm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () { // Same test with op_nonnorm, with maps in the arguments and the operations in the function. // CHECK-LABEL: test_nonnorm -// CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32, #map>) +// CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32, #[[MAP:.*]]>) func.func @test_nonnorm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () { %0 = memref.alloc() : memref<1x16x14x14xf32, #map0> "test.op_nonnorm"(%arg0, %0) : (memref<1x16x14x14xf32, #map0>, memref<1x16x14x14xf32, #map0>) -> () memref.dealloc %0 : memref<1x16x14x14xf32, #map0> - // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x14x14xf32, #map> - // CHECK: "test.op_nonnorm"(%[[ARG0]], %[[v0]]) : (memref<1x16x14x14xf32, #map>, memref<1x16x14x14xf32, #map>) -> () - // CHECK: memref.dealloc %[[v0]] : memref<1x16x14x14xf32, #map> + // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x14x14xf32, #[[MAP]]> + // CHECK: "test.op_nonnorm"(%[[ARG0]], %[[v0]]) : (memref<1x16x14x14xf32, #[[MAP]]>, memref<1x16x14x14xf32, #[[MAP]]>) -> () + // CHECK: memref.dealloc %[[v0]] : memref<1x16x14x14xf32, #[[MAP]]> return } diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 07cfca121f62d..0c35f81c129b0 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -119,8 +119,7 @@ def TestI64ElementsAttr : Test_Attr<"TestI64Elements", [ } def TestSubElementsAccessAttr : Test_Attr<"TestSubElementsAccess", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let mnemonic = "sub_elements_access"; diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 28fde0987ac09..4c7639b3ae252 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -150,20 +150,6 @@ void TestSubElementsAccessAttr::print(::mlir::AsmPrinter &printer) const { << ">"; } -void TestSubElementsAccessAttr::walkImmediateSubElements( - llvm::function_ref walkAttrsFn, - llvm::function_ref walkTypesFn) const { - walkAttrsFn(getFirst()); - walkAttrsFn(getSecond()); - walkAttrsFn(getThird()); -} - -Attribute TestSubElementsAccessAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - assert(replAttrs.size() == 3 && "invalid number of replacement attributes"); - return get(getContext(), replAttrs[0], replAttrs[1], replAttrs[2]); -} - //===----------------------------------------------------------------------===// // TestExtern1DI64ElementsAttr //===----------------------------------------------------------------------===// diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index ec0e79ebdd834..f34aaa364fac9 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -457,6 +457,13 @@ void DefGen::emitKeyType() { [&](auto ¶m) { os << param.getCppType(); }); os << '>'; storageCls->declare("KeyTy", std::move(os.str())); + + // Add a method to construct the key type from the storage. + Method *m = storageCls->addConstMethod("KeyTy", "getAsKey"); + m->body().indent() << "return KeyTy("; + llvm::interleaveComma(params, m->body().indent(), + [&](auto ¶m) { m->body() << param.getName(); }); + m->body() << ");"; } void DefGen::emitEquals() { diff --git a/mlir/unittests/IR/SubElementInterfaceTest.cpp b/mlir/unittests/IR/SubElementInterfaceTest.cpp index 292628aad5d47..66e29d48f7f47 100644 --- a/mlir/unittests/IR/SubElementInterfaceTest.cpp +++ b/mlir/unittests/IR/SubElementInterfaceTest.cpp @@ -23,13 +23,14 @@ TEST(SubElementInterfaceTest, Nested) { BoolAttr trueAttr = builder.getBoolAttr(true); BoolAttr falseAttr = builder.getBoolAttr(false); ArrayAttr boolArrayAttr = builder.getArrayAttr({trueAttr, falseAttr}); + StringAttr strAttr = builder.getStringAttr("array"); DictionaryAttr dictAttr = - builder.getDictionaryAttr(builder.getNamedAttr("array", boolArrayAttr)); + builder.getDictionaryAttr(builder.getNamedAttr(strAttr, boolArrayAttr)); SmallVector subAttrs; dictAttr.walkSubAttrs([&](Attribute attr) { subAttrs.push_back(attr); }); EXPECT_EQ(llvm::makeArrayRef(subAttrs), - ArrayRef({trueAttr, falseAttr, boolArrayAttr})); + ArrayRef({strAttr, trueAttr, falseAttr, boolArrayAttr})); } } // namespace From e83446f43b23cf4a4274cb3bde09883dcd9f7b4d Mon Sep 17 00:00:00 2001 From: River Riddle Date: Thu, 3 Nov 2022 19:12:53 -0700 Subject: [PATCH 305/516] [mlir:LLVM] Fix translation of DISubRangeAttr to LLVM We were missing DISubRangeAttr in the type switch. This also updates a few of the range parameters to be optional. --- mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td | 6 +++--- mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp | 6 ++++-- mlir/lib/Target/LLVMIR/DebugTranslation.cpp | 2 +- mlir/test/Target/LLVMIR/llvmir-debug.mlir | 12 ++++++++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 0c689d0019f45..182704defcc64 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -334,9 +334,9 @@ def LLVM_DISubrangeAttr : LLVM_Attr<"DISubrange", "di_subrange", /*traits=*/[], "DINodeAttr"> { let parameters = (ins "IntegerAttr":$count, - "IntegerAttr":$lowerBound, - "IntegerAttr":$upperBound, - "IntegerAttr":$stride + OptionalParameter<"IntegerAttr">:$lowerBound, + OptionalParameter<"IntegerAttr">:$upperBound, + OptionalParameter<"IntegerAttr">:$stride ); let assemblyFormat = "`<` struct(params) `>`"; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 3927e8169c0f4..74cce2a9ae40a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -44,7 +44,8 @@ bool DINodeAttr::classof(Attribute attr) { return llvm::isa(attr); + DISubprogramAttr, DISubrangeAttr, DISubroutineTypeAttr>( + attr); } //===----------------------------------------------------------------------===// @@ -61,7 +62,8 @@ bool DIScopeAttr::classof(Attribute attr) { //===----------------------------------------------------------------------===// bool DITypeAttr::classof(Attribute attr) { - return llvm::isa(attr); + return llvm::isa(attr); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 6de8febdd56a8..3dbb3f719e5b3 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -216,7 +216,7 @@ llvm::DINode *DebugTranslation::translate(DINodeAttr attr) { .Case( + DISubrangeAttr, DISubroutineTypeAttr>( [&](auto attr) { return translateImpl(attr); }); attrToNode.insert({attr, node}); return node; diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index fd95e176e1941..72f8f746b9769 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -27,7 +27,12 @@ llvm.func @func_no_debug() { sourceLanguage = DW_LANG_C, file = #file, producer = "MLIR", isOptimized = true, emissionKind = Full > -#spType = #llvm.di_subroutine_type +#composite = #llvm.di_composite_type< + tag = DW_TAG_structure_type, name = "composite", file = #file, + line = 0, sizeInBits = 0, alignInBits = 0, + elements = #llvm.di_subrange +> +#spType = #llvm.di_subroutine_type #sp = #llvm.di_subprogram< compileUnit = #cu, scope = #file, name = "intrinsics", linkageName = "intrinsics", file = #file, line = 3, scopeLine = 3, subprogramFlags = "Definition|Optimized", type = #spType @@ -69,8 +74,11 @@ llvm.func @func_with_debug(%arg: i64) { // CHECK: ![[FUNC_LOC]] = distinct !DISubprogram(name: "intrinsics", linkageName: "intrinsics", scope: ![[CU_FILE_LOC]], file: ![[CU_FILE_LOC]], line: 3, type: ![[FUNC_TYPE:.*]], scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: ![[CU_LOC]]) // CHECK: ![[FUNC_TYPE]] = !DISubroutineType(cc: DW_CC_normal, types: ![[ARG_TYPES:.*]]) -// CHECK: ![[ARG_TYPES]] = !{![[ARG_TYPE:.*]]} +// CHECK: ![[ARG_TYPES]] = !{![[ARG_TYPE:.*]], ![[COMPOSITE_TYPE:.*]]} // CHECK: ![[ARG_TYPE]] = !DIBasicType(name: "si64", encoding: DW_ATE_signed) +// CHECK: ![[COMPOSITE_TYPE]] = !DICompositeType(tag: DW_TAG_structure_type, name: "composite", file: ![[CU_FILE_LOC]], elements: ![[COMPOSITE_ELEMENTS:.*]]) +// CHECK: ![[COMPOSITE_ELEMENTS]] = !{![[COMPOSITE_ELEMENT:.*]]} +// CHECK: ![[COMPOSITE_ELEMENT]] = !DISubrange(count: 4) // CHECK: ![[VAR_LOC]] = !DILocalVariable(name: "arg", arg: 1, scope: ![[VAR_SCOPE:.*]], file: ![[CU_FILE_LOC]], line: 6, type: ![[ARG_TYPE]]) // CHECK: ![[VAR_SCOPE]] = distinct !DILexicalBlockFile(scope: ![[FUNC_LOC]], file: ![[CU_FILE_LOC]], discriminator: 0) From 55e899813ff2774c0cd9dbaf4da5c666500ba6f4 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Fri, 4 Nov 2022 18:53:50 -0700 Subject: [PATCH 306/516] [mlir] Attempt to fix windows build after D137374 --- mlir/include/mlir/IR/SubElementInterfaces.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/SubElementInterfaces.h b/mlir/include/mlir/IR/SubElementInterfaces.h index 0f3045de5f86a..ed387eb9a122e 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.h +++ b/mlir/include/mlir/IR/SubElementInterfaces.h @@ -196,7 +196,7 @@ struct AttrTypeSubElementHandler< static void walk(const std::tuple ¶m, AttrTypeSubElementWalker &walker) { std::apply( - [&](auto &&...params) { + [&](const Ts &...params) { (AttrTypeSubElementHandler::walk(params, walker), ...); }, param); From 6c6dff7e2c27c5a9ea9466d49f61a1edc82bc364 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Fri, 4 Nov 2022 14:22:35 -0400 Subject: [PATCH 307/516] [libc] Add add_with_carry to builtin wrapper. Add add_with_carry to builtin wrapper to be used by UInt class. Reviewed By: orex Differential Revision: https://reviews.llvm.org/D137453 --- libc/src/__support/CMakeLists.txt | 2 + libc/src/__support/builtin_wrappers.h | 56 +++++++++++++++++++ .../llvm-project-overlay/libc/BUILD.bazel | 1 + 3 files changed, 59 insertions(+) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 83495359d7068..aebc9b4a2f89f 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -10,6 +10,8 @@ add_header_library( builtin_wrappers HDRS builtin_wrappers.h + DEPENDS + libc.src.__support.CPP.type_traits ) add_header_library( diff --git a/libc/src/__support/builtin_wrappers.h b/libc/src/__support/builtin_wrappers.h index d30feb92f97b9..108100f0880ec 100644 --- a/libc/src/__support/builtin_wrappers.h +++ b/libc/src/__support/builtin_wrappers.h @@ -10,6 +10,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H #define LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H +#include "src/__support/CPP/type_traits.h" + namespace __llvm_libc { // The following overloads are matched based on what is accepted by @@ -64,6 +66,60 @@ template static inline int unsafe_clz(T val) { return __internal::clz(val); } +// Add with carry +template +inline constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, T> +add_with_carry(T a, T b, T carry_in, T &carry_out) { + T tmp = a + carry_in; + T sum = b + tmp; + carry_out = (sum < b) || (tmp < a); + return sum; +} + +#if __has_builtin(__builtin_addc) +// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins + +template <> +inline unsigned char add_with_carry(unsigned char a, + unsigned char b, + unsigned char carry_in, + unsigned char &carry_out) { + return __builtin_addcb(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned short +add_with_carry(unsigned short a, unsigned short b, + unsigned short carry_in, + unsigned short &carry_out) { + return __builtin_addcs(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned int add_with_carry(unsigned int a, unsigned int b, + unsigned int carry_in, + unsigned int &carry_out) { + return __builtin_addc(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned long add_with_carry(unsigned long a, + unsigned long b, + unsigned long carry_in, + unsigned long &carry_out) { + return __builtin_addcl(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned long long +add_with_carry(unsigned long long a, unsigned long long b, + unsigned long long carry_in, + unsigned long long &carry_out) { + return __builtin_addcll(a, b, carry_in, &carry_out); +} +#endif // __has_builtin(__builtin_addc) + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 2a81792db347c..abcb4cd136520 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -183,6 +183,7 @@ cc_library( name = "__support_builtin_wrappers", hdrs = ["src/__support/builtin_wrappers.h"], deps = [ + ":__support_cpp_type_traits", ":libc_root", ], ) From 670329036189040edb6c21e4fd1d98c0c979a9e2 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Sat, 5 Nov 2022 12:58:14 +0800 Subject: [PATCH 308/516] [InstCombine] fold `sub + and` pattern with specific const value `C1 - ((C3 - X) & C2) --> (X & C2) + (C1 - (C2 & C3))` when: (C3 - ((C2 & C3) - 1)) is pow2 && ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) && C2 is negative pow2 || (C3 - X) is nuw https://alive2.llvm.org/ce/z/HXQJV- Fix: #58523 Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D136582 --- .../InstCombine/InstCombineAddSub.cpp | 34 +++++++++++++++---- llvm/test/Transforms/InstCombine/sub.ll | 21 +++++------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 76234ccfdcff3..6f1b0b9b070bb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2032,12 +2032,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { - // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known - // zero. - KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); - if ((*Op0C | RHSKnown.Zero).isAllOnes()) - return BinaryOperator::CreateXor(Op1, Op0); + if (match(Op0, m_APInt(Op0C))) { + if (Op0C->isMask()) { + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnes()) + return BinaryOperator::CreateXor(Op1, Op0); + } + + // C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when: + // (C3 - ((C2 & C3) - 1)) is pow2 + // ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) + // C2 is negative pow2 || sub nuw + const APInt *C2, *C3; + BinaryOperator *InnerSub; + if (match(Op1, m_OneUse(m_And(m_BinOp(InnerSub), m_APInt(C2)))) && + match(InnerSub, m_Sub(m_APInt(C3), m_Value(X))) && + (InnerSub->hasNoUnsignedWrap() || C2->isNegatedPowerOf2())) { + APInt C2AndC3 = *C2 & *C3; + APInt C2AndC3Minus1 = C2AndC3 - 1; + APInt C2AddC3 = *C2 + *C3; + if ((*C3 - C2AndC3Minus1).isPowerOf2() && + C2AndC3Minus1.isSubsetOf(C2AddC3)) { + Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), *C2)); + return BinaryOperator::CreateAdd( + And, ConstantInt::get(I.getType(), *Op0C - C2AndC3)); + } + } } { diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 4f5349c1b5c09..69fe99fe8521a 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2229,16 +2229,14 @@ define i8 @demand_sub_from_variable_lowbits3(i8 %x, i8 %y) { ret i8 %r } -; TODO: ; C - ((C3 - X) & C2) --> (C - (C2 & C3)) + (X & C2) when: -; (C3 - (C2 & C3) + 1) is pow2 +; (C3 - ((C2 & C3) - 1)) is pow2 ; ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) ; C2 is negative pow2 define i10 @sub_to_and_nuw(i10 %x) { ; CHECK-LABEL: @sub_to_and_nuw( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i10 71, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i10 [[SUB]], 120 -; CHECK-NEXT: [[R:%.*]] = sub nuw nsw i10 443, [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and i10 [[X:%.*]], 120 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i10 [[TMP1]], 379 ; CHECK-NEXT: ret i10 [[R]] ; %sub = sub nuw i10 71, %x @@ -2247,15 +2245,13 @@ define i10 @sub_to_and_nuw(i10 %x) { ret i10 %r } -; TODO: ; C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when: -; (C3 - (C2 & C3) + 1) is pow2 +; (C3 - ((C2 & C3) - 1)) is pow2 ; ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) define i10 @sub_to_and_negpow2(i10 %x) { ; CHECK-LABEL: @sub_to_and_negpow2( -; CHECK-NEXT: [[SUB:%.*]] = sub i10 71, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i10 [[SUB]], -8 -; CHECK-NEXT: [[R:%.*]] = sub i10 33, [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and i10 [[X:%.*]], -8 +; CHECK-NEXT: [[R:%.*]] = add i10 [[TMP1]], -31 ; CHECK-NEXT: ret i10 [[R]] ; %sub = sub i10 71, %x @@ -2342,9 +2338,8 @@ define i10 @sub_to_and_negative4(i10 %x) { define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector1( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x From 3e8c1c4fc48a286dd546899a7374dce680633d93 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 4 Nov 2022 14:49:52 -0700 Subject: [PATCH 309/516] [lldb/crashlog] Fix frame parser regex for when there is no source info It can happen that the originator of a crash report doesn't have access to certain images. When that's the case, ReportCrash won't show the source info in the crash report stack frames, but only the stack address and image name. This patch fixes a bug in the crashlog stackframe parser regular expression to optionally match the source info group. rdar://101934135 Differential Revision: https://reviews.llvm.org/D137466 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 2 +- .../Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 47250f3b350f1..c3a450ccb14bd 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -645,7 +645,7 @@ class TextCrashLogParser(CrashLogParser): r'(.+?)\s+' # img_name r'(?:' +version+ r'\s+)?' # img_version r'(0x[0-9a-fA-F]{4,})' # addr (4 chars or more) - r' +(.*)' # offs + r'(?: +(.*))?' # offs ) null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +') image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash index 16a95586a13b4..4361ed5020028 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash @@ -29,7 +29,7 @@ Terminating Process: exc handler [21606] Thread 0 Crashed:: Dispatch queue: com.apple.main-thread 0 a.out @foo@ foo + 16 (test.c:3) -1 a.out @bar@ bar + 9 (test.c:6) +1 a.out @bar@ 2 a.out @main@ main + 20 (test.c:8) 3 libdyld.dylib 0x1000000 start + 1 From 40afc69e6dc0cc39e9e090ed96df95d6334db17c Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 4 Nov 2022 21:57:31 -0700 Subject: [PATCH 310/516] [lldb/test] Fix StructuredDataTest::GetDescriptionEmpty warning (NFC) Signed-off-by: Med Ismail Bennani --- lldb/unittests/Utility/StructuredDataTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/unittests/Utility/StructuredDataTest.cpp b/lldb/unittests/Utility/StructuredDataTest.cpp index e732016fe43db..e536039f365a4 100644 --- a/lldb/unittests/Utility/StructuredDataTest.cpp +++ b/lldb/unittests/Utility/StructuredDataTest.cpp @@ -38,7 +38,7 @@ TEST(StructuredDataTest, GetDescriptionEmpty) { StreamString S; object_sp->GetDescription(S); - EXPECT_EQ(0, S.GetSize()); + EXPECT_EQ(0u, S.GetSize()); } TEST(StructuredDataTest, GetDescriptionBasic) { From acba66fdde0ea918d4b7bd16863ac1bba7d1521b Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Fri, 4 Nov 2022 21:58:51 -0700 Subject: [PATCH 311/516] [lldb/crashlog] Standardize file path key in the ScriptedProcess Dictionary This patch replaces the backing file path key to "file_path" to keep it consistent. rdar://101652618 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 2 +- .../python/scripted_process/crashlog_scripted_process.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index c3a450ccb14bd..b62ebd758f22c 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -1104,7 +1104,7 @@ def load_crashlog_in_scripted_process(debugger, crash_log_file, options, result) raise InteractiveCrashLogException("couldn't import crashlog scripted process module") structured_data = lldb.SBStructuredData() - structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path, + structured_data.SetFromJSON(json.dumps({ "file_path" : crashlog_path, "load_all_images": options.load_all_images })) launch_info = lldb.SBLaunchInfo(None) launch_info.SetProcessPluginName("ScriptedProcess") diff --git a/lldb/examples/python/scripted_process/crashlog_scripted_process.py b/lldb/examples/python/scripted_process/crashlog_scripted_process.py index 55c50917c9d67..7ed5cc930da72 100644 --- a/lldb/examples/python/scripted_process/crashlog_scripted_process.py +++ b/lldb/examples/python/scripted_process/crashlog_scripted_process.py @@ -71,7 +71,7 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): self.crashlog_path = None - crashlog_path = args.GetValueForKey("crashlog_path") + crashlog_path = args.GetValueForKey("file_path") if crashlog_path and crashlog_path.IsValid(): if crashlog_path.GetType() == lldb.eStructuredDataTypeString: self.crashlog_path = crashlog_path.GetStringValue(4096) From 2951a9323f853b4e5fec6b07405a2a0dd1162eeb Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Sat, 5 Nov 2022 11:08:21 +0530 Subject: [PATCH 312/516] MLIR. NFC. Remove trailing whitespaces in MemRefOps.td Remove trailing whitespaces in MemRefOps.td. NFC. --- .../mlir/Dialect/MemRef/IR/MemRefOps.td | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 1f1b118087f90..cfc9d2a773087 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -483,7 +483,7 @@ def MemRef_CastOp : MemRef_Op<"cast", [ // CopyOp //===----------------------------------------------------------------------===// -def CopyOp : MemRef_Op<"copy", [CopyOpInterface, SameOperandsElementType, +def CopyOp : MemRef_Op<"copy", [CopyOpInterface, SameOperandsElementType, SameOperandsShape]> { let description = [{ @@ -809,10 +809,10 @@ def MemRef_DmaWaitOp : MemRef_Op<"dma_wait"> { // ExtractAlignedPointerAsIndexOp //===----------------------------------------------------------------------===// -def MemRef_ExtractAlignedPointerAsIndexOp : +def MemRef_ExtractAlignedPointerAsIndexOp : MemRef_Op<"extract_aligned_pointer_as_index", [ DeclareOpInterfaceMethods, - Pure, + Pure, SameVariadicResultSize]> { let summary = "Extracts a memref's underlying aligned pointer as an index"; let description = [{ @@ -852,7 +852,7 @@ def MemRef_ExtractAlignedPointerAsIndexOp : def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ DeclareOpInterfaceMethods, - Pure, + Pure, SameVariadicResultSize, DeclareOpInterfaceMethods]> { let summary = "Extracts a buffer base with offset and strides"; @@ -866,18 +866,18 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ This operation is also useful for completeness to the existing memref.dim op. While accessing strides, offsets and the base pointer independently is not - available, this is useful for composing with its natural complement op: + available, this is useful for composing with its natural complement op: `memref.reinterpret_cast`. Intended Use Cases: The main use case is to expose the logic for manipulate memref metadata at a - higher level than the LLVM dialect. + higher level than the LLVM dialect. This makes lowering more progressive and brings the following benefits: - not all users of MLIR want to lower to LLVM and the information to e.g. lower to library calls---like libxsmm---or to SPIR-V was not available. - - foldings and canonicalizations can happen at a higher level in MLIR: - before this op existed, lowering to LLVM would create large amounts of + - foldings and canonicalizations can happen at a higher level in MLIR: + before this op existed, lowering to LLVM would create large amounts of LLVMIR. Even when LLVM does a good job at folding the low-level IR from a performance perspective, it is unnecessarily opaque and inefficient to send unkempt IR to LLVM. @@ -885,11 +885,11 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ Example: ```mlir - %base, %offset, %sizes:2, %strides:2 = - memref.extract_strided_metadata %memref : + %base, %offset, %sizes:2, %strides:2 = + memref.extract_strided_metadata %memref : memref<10x?xf32>, index, index, index, index, index - // After folding, the type of %m2 can be memref<10x?xf32> and further + // After folding, the type of %m2 can be memref<10x?xf32> and further // folded to %memref. %m2 = memref.reinterpret_cast %base to offset: [%offset], @@ -1213,10 +1213,10 @@ def MemRef_PrefetchOp : MemRef_Op<"prefetch"> { def MemRef_ReinterpretCastOp : MemRef_OpWithOffsetSizesAndStrides<"reinterpret_cast", [ DeclareOpInterfaceMethods, - AttrSizedOperandSegments, + AttrSizedOperandSegments, MemRefsNormalizable, - Pure, - OffsetSizeAndStrideOpInterface, + Pure, + OffsetSizeAndStrideOpInterface, ViewLikeOpInterface ]> { let summary = "memref reinterpret cast operation"; @@ -2008,7 +2008,7 @@ def MemRef_TransposeOp : MemRef_Op<"transpose", [ def MemRef_ViewOp : MemRef_Op<"view", [ DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, Pure]> { let summary = "memref view operation"; let description = [{ From 1bb6f1bb8f40181977516ec5b81a7bec25d312a2 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 4 Nov 2022 12:53:24 -0700 Subject: [PATCH 313/516] [libc][docs] Move links to internal developer guides out of the main side bar. The links are all now from a "Developer Guides" page which is linked from the main side bar. --- libc/docs/api_test.rst | 5 ++++- libc/docs/clang_tidy_checks.rst | 2 +- ...build_system.rst => cmake_build_rules.rst} | 7 ++++-- libc/docs/code_style.rst | 22 +++++++++++++++++++ libc/docs/contributing.rst | 2 +- libc/docs/developer_guides.rst | 21 ++++++++++++++++++ libc/docs/index.rst | 11 +--------- libc/docs/overlay_mode.rst | 8 +++---- ...urce_layout.rst => source_tree_layout.rst} | 3 +++ 9 files changed, 62 insertions(+), 19 deletions(-) rename libc/docs/{build_system.rst => cmake_build_rules.rst} (91%) create mode 100644 libc/docs/code_style.rst create mode 100644 libc/docs/developer_guides.rst rename libc/docs/{source_layout.rst => source_tree_layout.rst} (98%) diff --git a/libc/docs/api_test.rst b/libc/docs/api_test.rst index b63adb3e64574..e39d506c3a92f 100644 --- a/libc/docs/api_test.rst +++ b/libc/docs/api_test.rst @@ -1,5 +1,8 @@ +.. _api_test: + +======== API Test -===================== +======== The implementation of libc-project is unique because our public C header files are generated using information from ground truth captured in TableGen files. Unit tests only exercise the internal C++ implementations and don't ensure the diff --git a/libc/docs/clang_tidy_checks.rst b/libc/docs/clang_tidy_checks.rst index b0e72cd011d4b..67ab5fc65c902 100644 --- a/libc/docs/clang_tidy_checks.rst +++ b/libc/docs/clang_tidy_checks.rst @@ -1,4 +1,4 @@ -.. _clangtidy_rules: +.. _clang_tidy_checks: LLVM libc clang-tidy checks =========================== diff --git a/libc/docs/build_system.rst b/libc/docs/cmake_build_rules.rst similarity index 91% rename from libc/docs/build_system.rst rename to libc/docs/cmake_build_rules.rst index b55f92a2e7bfc..dfa9f7a6d7d41 100644 --- a/libc/docs/build_system.rst +++ b/libc/docs/cmake_build_rules.rst @@ -1,5 +1,8 @@ -LLVM libc build rules -===================== +.. _cmake_build_rules: + +=========================== +The libc CMake build system +=========================== At the cost of verbosity, we want to keep the build system of LLVM libc as simple as possible. We also want to be highly modular with our build diff --git a/libc/docs/code_style.rst b/libc/docs/code_style.rst new file mode 100644 index 0000000000000..9efacc4bbece3 --- /dev/null +++ b/libc/docs/code_style.rst @@ -0,0 +1,22 @@ +.. _code_style: + +=================== +The libc code style +=================== + +For the large part, the libc project follows the general `coding standards of +the LLVM project `_. The libc +project differs from that standard with respect to the naming style. The +differences are as follows: + +#. **Non-const variables** - This includes function arguments, struct and + class data members, non-const globals and local variables. They all use the + ``snake_case`` style. +#. **const and constexpr variables** - They use the capitlized + ``SNAKE_CASE`` irrespective of whether they are local or global. +#. **Function and methods** - They use the ``snake_case`` style like the + non-const variables. +#. **Internal type names** - These are types which are interal to the libc + implementation. They use the `CaptilizedCamelCase` style. +#. **Public names** - These are the names as prescribed by the standards and + will follow the style as prescribed by the standards. diff --git a/libc/docs/contributing.rst b/libc/docs/contributing.rst index e17243df7bc69..65ba9a4079704 100644 --- a/libc/docs/contributing.rst +++ b/libc/docs/contributing.rst @@ -42,7 +42,7 @@ a list of open projects that one can start with: to a fast random number generator with a large range. #. **Update the clang-tidy lint rules and use them in the build and/or CI** - - Currently, the :ref:`clangtidy_rules` have gone stale and are mostly unused + Currently, the :ref:`clang_tidy_checks` have gone stale and are mostly unused by the developers and on the CI builders. This project is about updating them and reintegrating them back with the build and running them on the CI builders. diff --git a/libc/docs/developer_guides.rst b/libc/docs/developer_guides.rst new file mode 100644 index 0000000000000..e7e05e1cc0348 --- /dev/null +++ b/libc/docs/developer_guides.rst @@ -0,0 +1,21 @@ +.. _developer_guides: + +================ +Developer Guides +================ + +Navigate to the links below for information on the respective topics: + +.. toctree:: + + code_style + source_tree_layout + entrypoints + cmake_build_rules + clang_tidy_checks + fuzzing + ground_truth_specification + header_generation + implementation_standard + api_test + mechanics_of_public_api diff --git a/libc/docs/index.rst b/libc/docs/index.rst index c72544cdcc99b..e99ebafe274ef 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -68,16 +68,7 @@ stages there is no ABI stability in any form. :maxdepth: 1 :caption: Development - build_system - clang_tidy_checks - entrypoints - fuzzing - ground_truth_specification - header_generation - implementation_standard - api_test - mechanics_of_public_api - source_layout + developer_guides porting contributing diff --git a/libc/docs/overlay_mode.rst b/libc/docs/overlay_mode.rst index 4c5eaf939487c..0039e67f6384e 100644 --- a/libc/docs/overlay_mode.rst +++ b/libc/docs/overlay_mode.rst @@ -59,14 +59,14 @@ can follow up the build step with an install step: $> ninja install-llvmlibc -Building the static archive as part of the runtimes build ---------------------------------------------------------- +Building the static archive as part of the bootstrap build +---------------------------------------------------------- -The runtimes build is a build mode in which runtime components like libc++, +The bootstrap build is a build mode in which runtime components like libc++, libcxx-abi, libc etc. are built using the ToT clang. The idea is that this build produces an in-sync toolchain of compiler + runtime libraries. Such a synchrony is not essential for the libc but can one still build the overlay static archive -as part of the runtimes build if one wants to. The first step is to configure +as part of the bootstrap build if one wants to. The first step is to configure appropriately: .. code-block:: sh diff --git a/libc/docs/source_layout.rst b/libc/docs/source_tree_layout.rst similarity index 98% rename from libc/docs/source_layout.rst rename to libc/docs/source_tree_layout.rst index 960a060d9b7b7..a0565cb713080 100644 --- a/libc/docs/source_layout.rst +++ b/libc/docs/source_tree_layout.rst @@ -1,3 +1,6 @@ +.. _source_tree_layout: + +============================ LLVM-libc Source Tree Layout ============================ From 64821f178ed719546a2620457334c23d737c58f2 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Sat, 5 Nov 2022 00:21:09 -0700 Subject: [PATCH 314/516] [libc][docs] Fix typos. --- libc/docs/date_and_time.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libc/docs/date_and_time.rst b/libc/docs/date_and_time.rst index 9439da26e0d21..cadeabcb49478 100644 --- a/libc/docs/date_and_time.rst +++ b/libc/docs/date_and_time.rst @@ -8,8 +8,7 @@ Date and Time Functions Source location --------------- -- The main source for string functions is located at: - ``libc/src/time`` +- The main source for time functions is located at: ``libc/src/time`` --------------------- Implementation Status From b12c67703346f5d9b6b3c45e9a237458b7fc03eb Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Sat, 5 Nov 2022 00:58:16 -0700 Subject: [PATCH 315/516] [libc][docs] Add a build and test document. --- libc/docs/build_and_test.rst | 43 ++++++++++++++++++++++++++++++++++++ libc/docs/index.rst | 1 + 2 files changed, 44 insertions(+) create mode 100644 libc/docs/build_and_test.rst diff --git a/libc/docs/build_and_test.rst b/libc/docs/build_and_test.rst new file mode 100644 index 0000000000000..423481ecc3a59 --- /dev/null +++ b/libc/docs/build_and_test.rst @@ -0,0 +1,43 @@ +.. _build_and_test: + +============================= +Building and Testing the libc +============================= + +The libc can be built and tested in two different modes: + +#. **The overlay mode** - In this mode, one uses the static archive from LLVM's + libc along with the system libc. See :ref:`overlay_mode` for more details + on building and using the libc in this mode. You can only run the libc + unittests in this mode. To run them, one simply does: + + .. code-block:: sh + + $> ninja check-libc + + Note that, unittests for only those functions which are part of the overlay + static archive will be run with the above command. + +#. **The full build mode** - In this mode, the libc is used as the only libc + for the user's application. See :ref:`fullbuild_mode` for more details on + building and using the libc in this mode. Once configured for a full libc + build, you can run three kinds of tests: + + #. Unit tests - You can run unittests by the command: + + .. code-block:: sh + + $> ninja check-libc + + #. Integration tests - You can run integration tests by the command: + + .. code-block:: sh + + $> ninja libc-integration-tests + + #. API verification test - See :ref:`api_test` for more information about + the API test. It can be run by the command: + + .. code-block:: sh + + $> ninja libc-api-test diff --git a/libc/docs/index.rst b/libc/docs/index.rst index e99ebafe274ef..c298f00c1e99b 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -68,6 +68,7 @@ stages there is no ABI stability in any form. :maxdepth: 1 :caption: Development + build_and_test developer_guides porting contributing From ed4749f9373d0079a69e947486aa29042d606458 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Fri, 4 Nov 2022 23:26:02 +0100 Subject: [PATCH 316/516] [mlir] Add `populateFunctionOpInterfaceTypeConversionPattern` version which operates on any `FunctionOpInterface` Exisitng version is always limited to some specific op. Differential Revision: https://reviews.llvm.org/D137469 --- .../mlir/Transforms/DialectConversion.h | 3 + .../Transforms/Utils/DialectConversion.cpp | 57 +++++++++++++------ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 8 +-- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 061edb196f0fc..6045b2237976e 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -507,6 +507,9 @@ void populateFunctionOpInterfaceTypeConversionPattern( patterns, converter); } +void populateAnyFunctionOpInterfaceTypeConversionPattern( + RewritePatternSet &patterns, TypeConverter &converter); + //===----------------------------------------------------------------------===// // Conversion PatternRewriter //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 505127c459656..61bc4ffbe6f28 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3056,6 +3056,29 @@ auto TypeConverter::convertBlockSignature(Block *block) // FunctionOpInterfaceSignatureConversion //===----------------------------------------------------------------------===// +static LogicalResult convertFuncOpTypes(FunctionOpInterface funcOp, + TypeConverter &typeConverter, + ConversionPatternRewriter &rewriter) { + FunctionType type = funcOp.getFunctionType().cast(); + + // Convert the original function types. + TypeConverter::SignatureConversion result(type.getNumInputs()); + SmallVector newResults; + if (failed(typeConverter.convertSignatureArgs(type.getInputs(), result)) || + failed(typeConverter.convertTypes(type.getResults(), newResults)) || + failed(rewriter.convertRegionTypes(&funcOp.getFunctionBody(), + typeConverter, &result))) + return failure(); + + // Update the function signature in-place. + auto newType = FunctionType::get(rewriter.getContext(), + result.getConvertedTypes(), newResults); + + rewriter.updateRootInPlace(funcOp, [&] { funcOp.setType(newType); }); + + return success(); +} + /// Create a default conversion pattern that rewrites the type signature of a /// FunctionOpInterface op. This only supports ops which use FunctionType to /// represent their type. @@ -3067,27 +3090,21 @@ struct FunctionOpInterfaceSignatureConversion : public ConversionPattern { : ConversionPattern(converter, functionLikeOpName, /*benefit=*/1, ctx) {} LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, + matchAndRewrite(Operation *op, ArrayRef /*operands*/, ConversionPatternRewriter &rewriter) const override { FunctionOpInterface funcOp = cast(op); - FunctionType type = funcOp.getFunctionType().cast(); - - // Convert the original function types. - TypeConverter::SignatureConversion result(type.getNumInputs()); - SmallVector newResults; - if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) || - failed(typeConverter->convertTypes(type.getResults(), newResults)) || - failed(rewriter.convertRegionTypes(&funcOp.getFunctionBody(), - *typeConverter, &result))) - return failure(); - - // Update the function signature in-place. - auto newType = FunctionType::get(rewriter.getContext(), - result.getConvertedTypes(), newResults); + return convertFuncOpTypes(funcOp, *typeConverter, rewriter); + } +}; - rewriter.updateRootInPlace(op, [&] { funcOp.setType(newType); }); +struct AnyFunctionOpInterfaceSignatureConversion + : public OpInterfaceConversionPattern { + using OpInterfaceConversionPattern::OpInterfaceConversionPattern; - return success(); + LogicalResult + matchAndRewrite(FunctionOpInterface funcOp, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + return convertFuncOpTypes(funcOp, *typeConverter, rewriter); } }; } // namespace @@ -3099,6 +3116,12 @@ void mlir::populateFunctionOpInterfaceTypeConversionPattern( functionLikeOpName, patterns.getContext(), converter); } +void mlir::populateAnyFunctionOpInterfaceTypeConversionPattern( + RewritePatternSet &patterns, TypeConverter &converter) { + patterns.add( + converter, patterns.getContext()); +} + //===----------------------------------------------------------------------===// // ConversionTarget //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 17c8c1f84d35d..12f374777936c 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -786,8 +786,8 @@ struct TestLegalizePatternDriver TestNestedOpCreationUndoRewrite, TestReplaceEraseOp, TestCreateUnregisteredOp>(&getContext()); patterns.add(&getContext(), converter); - mlir::populateFunctionOpInterfaceTypeConversionPattern( - patterns, converter); + mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, + converter); mlir::populateCallOpTypeConversionPattern(patterns, converter); // Define the conversion target used for the test. @@ -1313,8 +1313,8 @@ struct TestTypeConversionDriver TestTestSignatureConversionNoConverter>(converter, &getContext()); patterns.add(&getContext()); - mlir::populateFunctionOpInterfaceTypeConversionPattern( - patterns, converter); + mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, + converter); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) From d95dc5bce9dfea001423e22a37e150d0b782012d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 11:27:46 +0000 Subject: [PATCH 317/516] [X86] Replace unnecessary int2double overrides with a better WriteCvtI2PD def Broadwell, Haswell and SkylakeClient were completely overriding the WriteCvtI2PD defs - we can remove those overrides entirely by replacing the unused WriteCvtI2PD values There's plenty more of these in the scheduler models - I'm looking at improving warnings in llvm-tblgen to catch them all --- llvm/lib/Target/X86/X86SchedBroadwell.td | 20 +++--------- llvm/lib/Target/X86/X86SchedHaswell.td | 26 ++++----------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 34 ++------------------ 3 files changed, 13 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 35a776941485a..3c6a6a5170d99 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -371,8 +371,8 @@ defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; @@ -877,10 +877,8 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDrr)>; def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr", "MMX_CVT(T?)PS2PIrr", - "(V?)CVTDQ2PDrr", "(V?)CVTPD2PSrr", "(V?)CVTSD2SSrr", "(V?)CVTSI642SDrr", @@ -1005,8 +1003,7 @@ def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup60], (instrs VCVTDQ2PDYrr, - VCVTPD2PSYrr, +def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2PSYrr, VCVTPD2DQYrr, VCVTTPD2DQYrr)>; @@ -1250,10 +1247,8 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, CVTPD2DQrm, - CVTTPD2DQrm, - MMX_CVTPI2PDrm)>; + CVTTPD2DQrm)>; def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm", - "(V?)CVTDQ2PDrm", "(V?)CVTSD2SSrm")>; def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { @@ -1315,13 +1310,6 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>; def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>; -def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup128], (instrs VCVTDQ2PDYrm)>; - def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index d1d385bfaf49f..b759423a14b25 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -365,9 +365,9 @@ defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1388,13 +1388,11 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDrr, - MMX_CVTPD2PIrr, +def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr, MMX_CVTPS2PIrr, MMX_CVTTPD2PIrr, MMX_CVTTPS2PIrr)>; -def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr", - "(V?)CVTPD2PSrr", +def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTPD2PSrr", "(V?)CVTSD2SSrr", "(V?)CVTSI(64)?2SDrr", "(V?)CVTSI2SSrr", @@ -1434,9 +1432,7 @@ def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm, CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, - MMX_CVTTPD2PIrm, - CVTDQ2PDrm, - VCVTDQ2PDrm)>; + MMX_CVTTPD2PIrm)>; def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let Latency = 9; @@ -1552,8 +1548,7 @@ def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup102], (instrs VCVTDQ2PDYrr, - VCVTPD2PSYrr, +def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2PSYrr, VCVTPD2DQYrr, VCVTTPD2DQYrr)>; @@ -1564,13 +1559,6 @@ def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 12; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup104], (instrs VCVTDQ2PDYrm)>; - def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> { let Latency = 6; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ba245bb6358b5..ffc5d9730d63a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -456,8 +456,8 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; @@ -936,15 +936,6 @@ def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", "MOVZX(16|32|64)rm(8|16)")>; -def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDrr, - CVTDQ2PDrr, - VCVTDQ2PDrr)>; - def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1113,13 +1104,6 @@ def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128, VPBROADCASTDYrm, VPBROADCASTQYrm)>; -def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup86], (instrs VCVTDQ2PDYrr)>; - def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; @@ -1408,13 +1392,6 @@ def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup149], (instregex "FICOM(P?)(16|32)m")>; -def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup150], (instregex "(V?)CVTDQ2PDrm")>; - def SKLWriteResGroup151 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort01]> { let Latency = 11; let NumMicroOps = 3; @@ -1472,13 +1449,6 @@ def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup162], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>; - def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 14; let NumMicroOps = 3; From 1bd6471c125d4de288753da0ff8737375503b796 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 11:19:36 +0000 Subject: [PATCH 318/516] [X86] Remove SandyBridge CVTSS2SIrm/CVTSD2SIrm overrides Just use the default WriteCvtSS2I/WriteCvtSD2I folded variants (already used by the VCVTSS2SI/VCVTSD2SI AVX variants). Confirmed with agner and uops.info - there should be no difference between the SSE/AVX1 variants in folded load latency --- llvm/lib/Target/X86/X86SchedSandyBridge.td | 7 ------- llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s | 8 ++++---- 7 files changed, 24 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 0f90036eb38e3..b1dd52da3fa23 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -1002,13 +1002,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { } def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>; -def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>; - def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s index 39a99e8a12408..291b8cd43cb4e 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index 7a8d4b03a9356..904454a547077 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s index 39a99e8a12408..291b8cd43cb4e 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index 7a8d4b03a9356..904454a547077 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s index 28915f49790d9..02b5810cda417 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index 2b6255c697fa1..c3b8b7389df4c 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 From 184817d0df00af94168fc9121b42e5a3fd77d8d7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 11:31:53 +0000 Subject: [PATCH 319/516] [X86] Add missing ReadAfterFold attributes from CVTSD2SSrm/CVTSS2SDrm to match the AVX + _Int equivalents This was never added in D52886, probably because the defs were already missing the old ReadAfterLd attribute, but they do exhibit the same behaviour of reading the dst reg after the load. --- llvm/lib/Target/X86/X86InstrSSE.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f90202e98a941..a31117e377ae2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1301,7 +1301,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { @@ -1367,7 +1367,7 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; + Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC; } // isCodeGenOnly = 1 let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, From ee1ad1a6d09935d804f1a7973c99b505017c0428 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 12:41:45 +0000 Subject: [PATCH 320/516] [X86] Add SchedWriteVecTruncate scheduler per-width wrapper Replaces hard coded uses of WriteVPMOV256 for all the vector truncations instructions We still need to work out how to fix folded stores (see Issue #36236) --- llvm/lib/Target/X86/X86InstrAVX512.td | 56 +++++++++++++-------------- llvm/lib/Target/X86/X86Schedule.td | 3 ++ 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2b823114a0357..b8b214596cd94 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9926,7 +9926,7 @@ multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, SDPatternOperator MaskNode128, SDPatternOperator MaskNode256, SDPatternOperator MaskNode512, - X86FoldableSchedWrite sched, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -9935,25 +9935,25 @@ multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; - defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, } defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", - WriteVPMOV256, truncstore_us_vi8, + SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, - WriteVPMOV256, truncstorevi16, + SchedWriteVecTruncate, truncstorevi16, masked_truncstorevi16, X86vtrunc, X86vmtrunc>; defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi16, + SchedWriteVecTruncate, truncstore_s_vi16, masked_truncstore_s_vi16, X86vtruncs, X86vmtruncs>; defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi16, masked_truncstore_us_vi16, X86vtruncus, X86vmtruncus>; defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, - WriteVPMOV256, truncstorevi32, + SchedWriteVecTruncate, truncstorevi32, masked_truncstorevi32, X86vtrunc, X86vmtrunc>; defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi32, + SchedWriteVecTruncate, truncstore_s_vi32, masked_truncstore_s_vi32, X86vtruncs, X86vmtruncs>; defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi32, masked_truncstore_us_vi32, X86vtruncus, X86vmtruncus>; defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, - WriteVPMOV256, truncstorevi16, + SchedWriteVecTruncate, truncstorevi16, masked_truncstorevi16, X86vtrunc, X86vmtrunc>; defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi16, + SchedWriteVecTruncate, truncstore_s_vi16, masked_truncstore_s_vi16, X86vtruncs, X86vmtruncs>; defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi16, masked_truncstore_us_vi16, X86vtruncus, X86vmtruncus>; defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index b156396660805..3321ed737a444 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -681,6 +681,9 @@ def SchedWritePSADBW def SchedWriteVecExtend : X86SchedWriteWidths; +def SchedWriteVecTruncate + : X86SchedWriteWidths; def SchedWriteShuffle : X86SchedWriteWidths; From b781ca4df65bcdd72abc121e30d2fc743a25720c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 13:57:07 +0000 Subject: [PATCH 321/516] [X86] Fix override for CVTPD2PS/CVTPD2DQ/CVTTPD2DQ AVX variants These were lost when they were converted from instregex to instrs --- llvm/lib/Target/X86/X86SchedBroadwell.td | 6 +++--- llvm/lib/Target/X86/X86SchedHaswell.td | 6 +++--- .../tools/llvm-mca/X86/Broadwell/resources-avx1.s | 14 +++++++------- .../tools/llvm-mca/X86/Haswell/resources-avx1.s | 14 +++++++------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 3c6a6a5170d99..84b36f1a84c19 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -1245,9 +1245,9 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, - CVTPD2DQrm, - CVTTPD2DQrm)>; +def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, VCVTPD2PSrm, + CVTPD2DQrm, VCVTPD2DQrm, + CVTTPD2DQrm, VCVTTPD2DQrm)>; def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm", "(V?)CVTSD2SSrm")>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b759423a14b25..4c646d47a90cc 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1428,9 +1428,9 @@ def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm, - CVTPD2DQrm, - CVTTPD2DQrm, +def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm, VCVTPD2PSrm, + CVTPD2DQrm, VCVTPD2DQrm, + CVTTPD2DQrm, VCVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index bc98992d516bc..f69fed1cd4aa1 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1123,11 +1123,11 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 @@ -1159,7 +1159,7 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 426.25 2.25 12.67 +# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 429.25 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1833,11 +1833,11 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 @@ -1869,7 +1869,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 997ea6156a0ae..6146a207e49e1 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1123,11 +1123,11 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 @@ -1159,7 +1159,7 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 429.58 2.25 12.67 +# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 432.58 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1833,11 +1833,11 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 @@ -1869,7 +1869,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2 From 4c52a9879bfd978f31ed3c8b1e4e700b764bcf31 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Sat, 5 Nov 2022 15:10:40 +0100 Subject: [PATCH 322/516] [flang] Fix controlSuccessor chain for select type construct Represent the select type statement + type guard statement the same way the select case statement and case statement are represented. controlSuccessor was not correctly attributed to the next type guard stmt. Reviewed By: PeteSteinfeld, vdonaldson Differential Revision: https://reviews.llvm.org/D137460 --- flang/lib/Lower/PFTBuilder.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 62ec5adf7758e..8f87e96c986ca 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -904,8 +904,13 @@ class PFTBuilder { [&](const parser::SelectRankCaseStmt &) { eval.isNewBlock = true; }, [&](const parser::SelectTypeStmt &s) { insertConstructName(s, parentConstruct); + lastConstructStmtEvaluation = &eval; + }, + [&](const parser::TypeGuardStmt &) { + eval.isNewBlock = true; + lastConstructStmtEvaluation->controlSuccessor = &eval; + lastConstructStmtEvaluation = &eval; }, - [&](const parser::TypeGuardStmt &) { eval.isNewBlock = true; }, // Constructs - set (unstructured) construct exit targets [&](const parser::AssociateConstruct &) { setConstructExit(eval); }, From 0b7f327800ab2d86dafd9033eff989b3193c8334 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 14:35:41 +0000 Subject: [PATCH 323/516] [X86] Fix cvtss2si64/cvttss2si64 typo in SkylakeClient SS2SI64 conversions use Port0/Port01/Port5 (with/without truncation), but SS2SI32 only uses Port0/Port01 like SD2SI32/SD2SI64 --- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 4 ++-- .../tools/llvm-mca/X86/SkylakeClient/resources-avx1.s | 8 ++++---- .../tools/llvm-mca/X86/SkylakeClient/resources-sse1.s | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ffc5d9730d63a..ef8a5efeb76cf 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -1023,7 +1023,7 @@ def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup70], (instregex "(V?)CVTSS2SI(64)?rr", +def: InstRW<[SKLWriteResGroup70], (instregex "(V?)CVT(T?)SS2SIrr", "(V?)CVT(T?)SD2SI(64)?rr")>; def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> { @@ -1160,7 +1160,7 @@ def SKLWriteResGroup95 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort01]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup95], (instregex "(V?)CVTTSS2SI(64)?rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "(V?)CVT(T?)SS2SI64?rr")>; def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { let Latency = 7; diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index eb70e8be3bb6a..2490cc7744d2c 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1155,7 +1155,7 @@ vzeroupper # CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2 @@ -1170,7 +1170,7 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx -# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx @@ -1865,7 +1865,7 @@ vzeroupper # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2 @@ -1880,7 +1880,7 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvttsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx -# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvttss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttss2si (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s index e93bda0e38c07..e25e56ce84184 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s @@ -217,12 +217,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2 -# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx @@ -360,12 +360,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2 -# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - 1.00 - - cvttss2si (%rax), %rcx From 660b243120bcefdc108471c724fec382b062ad62 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 1 Nov 2022 20:06:11 +0100 Subject: [PATCH 324/516] [libc++] Add [[nodiscard]] extensions to ranges algorithms This mirrors what we have done in the classic algorithms Reviewed By: ldionne, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D137186 --- libcxx/docs/UsingLibcxx.rst | 38 ++++++++ .../__algorithm/ranges_adjacent_find.h | 4 +- libcxx/include/__algorithm/ranges_all_of.h | 4 +- libcxx/include/__algorithm/ranges_any_of.h | 4 +- .../__algorithm/ranges_binary_search.h | 4 +- libcxx/include/__algorithm/ranges_clamp.h | 2 +- libcxx/include/__algorithm/ranges_count.h | 4 +- libcxx/include/__algorithm/ranges_count_if.h | 4 +- libcxx/include/__algorithm/ranges_equal.h | 4 +- .../include/__algorithm/ranges_equal_range.h | 4 +- libcxx/include/__algorithm/ranges_find.h | 4 +- libcxx/include/__algorithm/ranges_find_end.h | 4 +- .../__algorithm/ranges_find_first_of.h | 4 +- libcxx/include/__algorithm/ranges_find_if.h | 4 +- .../include/__algorithm/ranges_find_if_not.h | 4 +- libcxx/include/__algorithm/ranges_includes.h | 4 +- libcxx/include/__algorithm/ranges_is_heap.h | 4 +- .../__algorithm/ranges_is_heap_until.h | 4 +- .../__algorithm/ranges_is_partitioned.h | 4 +- .../__algorithm/ranges_is_permutation.h | 4 +- libcxx/include/__algorithm/ranges_is_sorted.h | 4 +- .../__algorithm/ranges_is_sorted_until.h | 4 +- .../ranges_lexicographical_compare.h | 4 +- .../include/__algorithm/ranges_lower_bound.h | 4 +- libcxx/include/__algorithm/ranges_max.h | 6 +- .../include/__algorithm/ranges_max_element.h | 4 +- libcxx/include/__algorithm/ranges_min.h | 6 +- .../include/__algorithm/ranges_min_element.h | 4 +- libcxx/include/__algorithm/ranges_minmax.h | 6 +- .../__algorithm/ranges_minmax_element.h | 4 +- libcxx/include/__algorithm/ranges_mismatch.h | 4 +- libcxx/include/__algorithm/ranges_none_of.h | 4 +- libcxx/include/__algorithm/ranges_remove.h | 4 +- libcxx/include/__algorithm/ranges_remove_if.h | 4 +- libcxx/include/__algorithm/ranges_search.h | 4 +- libcxx/include/__algorithm/ranges_search_n.h | 4 +- libcxx/include/__algorithm/ranges_unique.h | 4 +- .../include/__algorithm/ranges_upper_bound.h | 4 +- ... => nodiscard_extensions.compile.pass.cpp} | 7 +- ...nges.nodiscard_extensions.compile.pass.cpp | 93 +++++++++++++++++++ .../ranges.nodiscard_extensions.verify.cpp | 90 ++++++++++++++++++ .../alg.clamp/assert.ranges_clamp.pass.cpp | 10 +- ...ust_against_differing_projections.pass.cpp | 4 +- ...robust_against_nonbool_predicates.pass.cpp | 32 +++---- ...es_robust_against_omitting_invoke.pass.cpp | 32 +++---- ...es_robust_against_proxy_iterators.pass.cpp | 12 +-- 46 files changed, 344 insertions(+), 126 deletions(-) rename libcxx/test/libcxx/diagnostics/{nodiscard_extensions.pass.cpp => nodiscard_extensions.compile.pass.cpp} (97%) create mode 100644 libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp create mode 100644 libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index 59a1e4b982f97..e6425d8c7c8b4 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -393,6 +393,44 @@ which no dialect declares as such (See the second form described above). * ``search`` * ``unique`` * ``upper_bound`` +* ``ranges::adjacent_find`` +* ``ranges::all_of`` +* ``ranges::any_of`` +* ``ranges::binary_search`` +* ``ranges::clamp`` +* ``ranges::count_if`` +* ``ranges::count`` +* ``ranges::equal_range`` +* ``ranges::equal`` +* ``ranges::find_end`` +* ``ranges::find_first_of`` +* ``ranges::find_if_not`` +* ``ranges::find_if`` +* ``ranges::find`` +* ``ranges::get_temporary_buffer`` +* ``ranges::includes`` +* ``ranges::is_heap_until`` +* ``ranges::is_heap`` +* ``ranges::is_partitioned`` +* ``ranges::is_permutation`` +* ``ranges::is_sorted_until`` +* ``ranges::is_sorted`` +* ``ranges::lexicographical_compare`` +* ``ranges::lower_bound`` +* ``ranges::max_element`` +* ``ranges::max`` +* ``ranges::min_element`` +* ``ranges::min`` +* ``ranges::minmax_element`` +* ``ranges::minmax`` +* ``ranges::mismatch`` +* ``ranges::none_of`` +* ``ranges::remove_if`` +* ``ranges::remove`` +* ``ranges::search_n`` +* ``ranges::search`` +* ``ranges::unique`` +* ``ranges::upper_bound`` * ``lock_guard``'s constructors * ``as_const`` * ``bit_cast`` diff --git a/libcxx/include/__algorithm/ranges_adjacent_find.h b/libcxx/include/__algorithm/ranges_adjacent_find.h index 4ed306baf474d..d338d13e6eee9 100644 --- a/libcxx/include/__algorithm/ranges_adjacent_find.h +++ b/libcxx/include/__algorithm/ranges_adjacent_find.h @@ -50,7 +50,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_binary_predicate, projected<_Iter, _Proj>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __adjacent_find_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -59,7 +59,7 @@ struct __fn { class _Proj = identity, indirect_binary_predicate, _Proj>, projected, _Proj>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Pred __pred = {}, _Proj __proj = {}) const { return __adjacent_find_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_all_of.h b/libcxx/include/__algorithm/ranges_all_of.h index f73d069b99c2d..e45c4e5843790 100644 --- a/libcxx/include/__algorithm/ranges_all_of.h +++ b/libcxx/include/__algorithm/ranges_all_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return __all_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __all_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_any_of.h b/libcxx/include/__algorithm/ranges_any_of.h index 53627ed5c2e57..e7d1e723a70f4 100644 --- a/libcxx/include/__algorithm/ranges_any_of.h +++ b/libcxx/include/__algorithm/ranges_any_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __any_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __any_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_binary_search.h b/libcxx/include/__algorithm/ranges_binary_search.h index 39a9a02e44dd2..b2a8977652fb9 100644 --- a/libcxx/include/__algorithm/ranges_binary_search.h +++ b/libcxx/include/__algorithm/ranges_binary_search.h @@ -33,7 +33,7 @@ namespace __binary_search { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); return __ret != __last && !std::invoke(__comp, __value, std::invoke(__proj, *__first)); @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_clamp.h b/libcxx/include/__algorithm/ranges_clamp.h index 1bb3a5a2ee674..09a97fc790eac 100644 --- a/libcxx/include/__algorithm/ranges_clamp.h +++ b/libcxx/include/__algorithm/ranges_clamp.h @@ -33,7 +33,7 @@ struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Type& operator()(const _Type& __value, const _Type& __low, const _Type& __high, diff --git a/libcxx/include/__algorithm/ranges_count.h b/libcxx/include/__algorithm/ranges_count.h index f790c994f8be2..527dd0620085f 100644 --- a/libcxx/include/__algorithm/ranges_count.h +++ b/libcxx/include/__algorithm/ranges_count.h @@ -34,7 +34,7 @@ namespace __count { struct __fn { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return __e == __value; }; return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -42,7 +42,7 @@ struct __fn { template requires indirect_binary_predicate, _Proj>, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return __e == __value; }; return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_count_if.h b/libcxx/include/__algorithm/ranges_count_if.h index 8cf1d026804c1..931618b7b545b 100644 --- a/libcxx/include/__algorithm/ranges_count_if.h +++ b/libcxx/include/__algorithm/ranges_count_if.h @@ -46,14 +46,14 @@ namespace __count_if { struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Predicate> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, _Predicate __pred, _Proj __proj = {}) const { return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Predicate> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, _Predicate __pred, _Proj __proj = {}) const { return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_equal.h b/libcxx/include/__algorithm/ranges_equal.h index f7424ffd52ad1..3c417f09de902 100644 --- a/libcxx/include/__algorithm/ranges_equal.h +++ b/libcxx/include/__algorithm/ranges_equal.h @@ -60,7 +60,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -83,7 +83,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_equal_range.h b/libcxx/include/__algorithm/ranges_equal_range.h index efe5b2f4193bf..94dc058e7bc15 100644 --- a/libcxx/include/__algorithm/ranges_equal_range.h +++ b/libcxx/include/__algorithm/ranges_equal_range.h @@ -44,7 +44,7 @@ struct __fn { class _Tp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__equal_range<_RangeAlgPolicy>( std::move(__first), std::move(__last), __value, __comp, __proj); @@ -56,7 +56,7 @@ struct __fn { class _Tp, class _Proj = identity, indirect_strict_weak_order, _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__equal_range<_RangeAlgPolicy>( ranges::begin(__range), ranges::end(__range), __value, __comp, __proj); diff --git a/libcxx/include/__algorithm/ranges_find.h b/libcxx/include/__algorithm/ranges_find.h index 1681eb1876779..580c2a14c9ed9 100644 --- a/libcxx/include/__algorithm/ranges_find.h +++ b/libcxx/include/__algorithm/ranges_find.h @@ -35,7 +35,7 @@ namespace __find { struct __fn { template _Sp, class _Tp, class _Proj = identity> requires indirect_binary_predicate, const _Tp*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, const _Tp& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return std::forward(__e) == __value; }; return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -43,7 +43,7 @@ struct __fn { template requires indirect_binary_predicate, _Proj>, const _Tp*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, const _Tp& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return std::forward(__e) == __value; }; return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_find_end.h b/libcxx/include/__algorithm/ranges_find_end.h index df891000b526f..ea36f4d4e6e7e 100644 --- a/libcxx/include/__algorithm/ranges_find_end.h +++ b/libcxx/include/__algorithm/ranges_find_end.h @@ -40,7 +40,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -65,7 +65,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_find_first_of.h b/libcxx/include/__algorithm/ranges_find_first_of.h index 44221c1d5f8cb..9d66e7511c0fb 100644 --- a/libcxx/include/__algorithm/ranges_find_first_of.h +++ b/libcxx/include/__algorithm/ranges_find_first_of.h @@ -54,7 +54,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter1 operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -73,7 +73,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_find_if.h b/libcxx/include/__algorithm/ranges_find_if.h index b3f450e79be52..45ce6e460d685 100644 --- a/libcxx/include/__algorithm/ranges_find_if.h +++ b/libcxx/include/__algorithm/ranges_find_if.h @@ -45,14 +45,14 @@ struct __fn { template _Sp, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_find_if_not.h b/libcxx/include/__algorithm/ranges_find_if_not.h index ffd42ed21d912..3dd12132754bf 100644 --- a/libcxx/include/__algorithm/ranges_find_if_not.h +++ b/libcxx/include/__algorithm/ranges_find_if_not.h @@ -35,7 +35,7 @@ namespace __find_if_not { struct __fn { template _Sp, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { auto __pred2 = [&](auto&& __e) { return !std::invoke(__pred, std::forward(__e)); }; return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred2, __proj); @@ -43,7 +43,7 @@ struct __fn { template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { auto __pred2 = [&](auto&& __e) { return !std::invoke(__pred, std::forward(__e)); }; return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred2, __proj); diff --git a/libcxx/include/__algorithm/ranges_includes.h b/libcxx/include/__algorithm/ranges_includes.h index 26cd8d8b8dfb6..8438117cfa808 100644 --- a/libcxx/include/__algorithm/ranges_includes.h +++ b/libcxx/include/__algorithm/ranges_includes.h @@ -43,7 +43,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_strict_weak_order, projected<_Iter2, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -68,7 +68,7 @@ struct __fn { class _Proj2 = identity, indirect_strict_weak_order, _Proj1>, projected, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return std::__includes( ranges::begin(__range1), diff --git a/libcxx/include/__algorithm/ranges_is_heap.h b/libcxx/include/__algorithm/ranges_is_heap.h index 6b1193178028b..a16c075b0763f 100644 --- a/libcxx/include/__algorithm/ranges_is_heap.h +++ b/libcxx/include/__algorithm/ranges_is_heap.h @@ -47,14 +47,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_fn_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_heap_until.h b/libcxx/include/__algorithm/ranges_is_heap_until.h index 2ca0d06f6e30a..8c8dac5bc9099 100644 --- a/libcxx/include/__algorithm/ranges_is_heap_until.h +++ b/libcxx/include/__algorithm/ranges_is_heap_until.h @@ -47,14 +47,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_until_fn_impl(std::move(__first), std::move(__last), __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_until_fn_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_partitioned.h b/libcxx/include/__algorithm/ranges_is_partitioned.h index ce56378303ebf..b903953d61658 100644 --- a/libcxx/include/__algorithm/ranges_is_partitioned.h +++ b/libcxx/include/__algorithm/ranges_is_partitioned.h @@ -54,7 +54,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return __is_parititioned_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -62,7 +62,7 @@ struct __fn { template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __is_parititioned_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_permutation.h b/libcxx/include/__algorithm/ranges_is_permutation.h index afee6b5573e76..b617500ea0d87 100644 --- a/libcxx/include/__algorithm/ranges_is_permutation.h +++ b/libcxx/include/__algorithm/ranges_is_permutation.h @@ -49,7 +49,7 @@ struct __fn { class _Proj2 = identity, indirect_equivalence_relation, projected<_Iter2, _Proj2>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __is_permutation_func_impl( @@ -62,7 +62,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_equivalence_relation, _Proj1>, projected, _Proj2>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { if constexpr (sized_range<_Range1> && sized_range<_Range2>) { diff --git a/libcxx/include/__algorithm/ranges_is_sorted.h b/libcxx/include/__algorithm/ranges_is_sorted.h index e3550569af289..ce3032ff226e1 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted.h +++ b/libcxx/include/__algorithm/ranges_is_sorted.h @@ -33,7 +33,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(std::move(__first), __last, __comp, __proj) == __last; } @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { auto __last = ranges::end(__range); return ranges::__is_sorted_until_impl(ranges::begin(__range), __last, __comp, __proj) == __last; diff --git a/libcxx/include/__algorithm/ranges_is_sorted_until.h b/libcxx/include/__algorithm/ranges_is_sorted_until.h index 47e98b8f452ec..17fc42e97fd30 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted_until.h +++ b/libcxx/include/__algorithm/ranges_is_sorted_until.h @@ -49,7 +49,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(std::move(__first), std::move(__last), __comp, __proj); } @@ -57,7 +57,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/libcxx/include/__algorithm/ranges_lexicographical_compare.h index eab7bbe3e0d11..2972e327169da 100644 --- a/libcxx/include/__algorithm/ranges_lexicographical_compare.h +++ b/libcxx/include/__algorithm/ranges_lexicographical_compare.h @@ -55,7 +55,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_strict_weak_order, projected<_Iter2, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp __comp = {}, @@ -74,7 +74,7 @@ struct __fn { class _Proj2 = identity, indirect_strict_weak_order, _Proj1>, projected, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __lexicographical_compare_impl(ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), ranges::end(__range2), diff --git a/libcxx/include/__algorithm/ranges_lower_bound.h b/libcxx/include/__algorithm/ranges_lower_bound.h index abcbe82a443d1..78cbb6d4fb245 100644 --- a/libcxx/include/__algorithm/ranges_lower_bound.h +++ b/libcxx/include/__algorithm/ranges_lower_bound.h @@ -37,14 +37,14 @@ namespace __lower_bound { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { return std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, diff --git a/libcxx/include/__algorithm/ranges_max.h b/libcxx/include/__algorithm/ranges_max.h index f027faa075575..55aef997698c6 100644 --- a/libcxx/include/__algorithm/ranges_max.h +++ b/libcxx/include/__algorithm/ranges_max.h @@ -39,14 +39,14 @@ namespace __max { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator()(const _Tp& __a, const _Tp& __b, _Comp __comp = {}, _Proj __proj = {}) const { return std::invoke(__comp, std::invoke(__proj, __a), std::invoke(__proj, __b)) ? __b : __a; } template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Tp operator()(initializer_list<_Tp> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list must contain at least one element"); @@ -57,7 +57,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Rp>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_max_element.h b/libcxx/include/__algorithm/ranges_max_element.h index d4c3242e04eba..490f32075a4c1 100644 --- a/libcxx/include/__algorithm/ranges_max_element.h +++ b/libcxx/include/__algorithm/ranges_max_element.h @@ -33,7 +33,7 @@ namespace __max_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; return ranges::__min_element_impl(__first, __last, __comp_lhs_rhs_swapped, __proj); @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); diff --git a/libcxx/include/__algorithm/ranges_min.h b/libcxx/include/__algorithm/ranges_min.h index 8152a411f3832..0e31f57fb8dde 100644 --- a/libcxx/include/__algorithm/ranges_min.h +++ b/libcxx/include/__algorithm/ranges_min.h @@ -38,14 +38,14 @@ namespace __min { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator()(const _Tp& __a, const _Tp& __b, _Comp __comp = {}, _Proj __proj = {}) const { return std::invoke(__comp, std::invoke(__proj, __b), std::invoke(__proj, __a)) ? __b : __a; } template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Tp operator()(initializer_list<_Tp> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list must contain at least one element"); return *ranges::__min_element_impl(__il.begin(), __il.end(), __comp, __proj); @@ -54,7 +54,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Rp>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_min_element.h b/libcxx/include/__algorithm/ranges_min_element.h index 66d649971465f..1751874d03bb8 100644 --- a/libcxx/include/__algorithm/ranges_min_element.h +++ b/libcxx/include/__algorithm/ranges_min_element.h @@ -48,14 +48,14 @@ namespace __min_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__min_element_impl(__first, __last, __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 377fe9b4a26d4..f82e00551e47b 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -45,7 +45,7 @@ namespace __minmax { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result operator()(const _Type& __a, const _Type& __b, _Comp __comp = {}, _Proj __proj = {}) const { if (std::invoke(__comp, std::invoke(__proj, __b), std::invoke(__proj, __a))) return {__b, __a}; @@ -54,7 +54,7 @@ struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result<_Type> operator()(initializer_list<_Type> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list has to contain at least one element"); auto __iters = std::__minmax_element_impl(__il.begin(), __il.end(), __comp, __proj); @@ -64,7 +64,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Range>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_minmax_element.h b/libcxx/include/__algorithm/ranges_minmax_element.h index 9d1093b648a28..6699f9626e1bf 100644 --- a/libcxx/include/__algorithm/ranges_minmax_element.h +++ b/libcxx/include/__algorithm/ranges_minmax_element.h @@ -42,7 +42,7 @@ namespace __minmax_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_element_result<_Ip> operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__minmax_element_impl(std::move(__first), std::move(__last), __comp, __proj); return {__ret.first, __ret.second}; @@ -50,7 +50,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_element_result> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__minmax_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); diff --git a/libcxx/include/__algorithm/ranges_mismatch.h b/libcxx/include/__algorithm/ranges_mismatch.h index 4775daf4f7f69..4fd051792838c 100644 --- a/libcxx/include/__algorithm/ranges_mismatch.h +++ b/libcxx/include/__algorithm/ranges_mismatch.h @@ -55,7 +55,7 @@ struct __fn { input_iterator _I2, sentinel_for<_I2> _S2, class _Pred = ranges::equal_to, class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_I1, _I2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result<_I1, _I2> operator()(_I1 __first1, _S1 __last1, _I2 __first2, _S2 __last2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __go(std::move(__first1), __last1, std::move(__first2), __last2, __pred, __proj1, __proj2); @@ -64,7 +64,7 @@ struct __fn { template requires indirectly_comparable, iterator_t<_R2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result, borrowed_iterator_t<_R2>> operator()(_R1&& __r1, _R2&& __r2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __go(ranges::begin(__r1), ranges::end(__r1), ranges::begin(__r2), ranges::end(__r2), diff --git a/libcxx/include/__algorithm/ranges_none_of.h b/libcxx/include/__algorithm/ranges_none_of.h index d93b630bde2b5..b39e570fd33a8 100644 --- a/libcxx/include/__algorithm/ranges_none_of.h +++ b/libcxx/include/__algorithm/ranges_none_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __none_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __none_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_remove.h b/libcxx/include/__algorithm/ranges_remove.h index eb53a5db1b5aa..dd5c5fb4536ac 100644 --- a/libcxx/include/__algorithm/ranges_remove.h +++ b/libcxx/include/__algorithm/ranges_remove.h @@ -35,7 +35,7 @@ struct __fn { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __other) { return __value == __other; }; return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -44,7 +44,7 @@ struct __fn { template requires permutable> && indirect_binary_predicate, _Proj>, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __other) { return __value == __other; }; return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_remove_if.h b/libcxx/include/__algorithm/ranges_remove_if.h index c2e9052d3b313..1f17467fc43eb 100644 --- a/libcxx/include/__algorithm/ranges_remove_if.h +++ b/libcxx/include/__algorithm/ranges_remove_if.h @@ -56,7 +56,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -65,7 +65,7 @@ struct __fn { class _Proj = identity, indirect_unary_predicate, _Proj>> _Pred> requires permutable> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_search.h b/libcxx/include/__algorithm/ranges_search.h index 24bbe28ead4d1..388d5afa499d6 100644 --- a/libcxx/include/__algorithm/ranges_search.h +++ b/libcxx/include/__algorithm/ranges_search.h @@ -75,7 +75,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -90,7 +90,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_search_n.h b/libcxx/include/__algorithm/ranges_search_n.h index d2846f6c5c874..f44afde03e99a 100644 --- a/libcxx/include/__algorithm/ranges_search_n.h +++ b/libcxx/include/__algorithm/ranges_search_n.h @@ -76,7 +76,7 @@ struct __fn { class _Pred = ranges::equal_to, class _Proj = identity> requires indirectly_comparable<_Iter, const _Type*, _Pred, _Proj> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, iter_difference_t<_Iter> __count, const _Type& __value, @@ -87,7 +87,7 @@ struct __fn { template requires indirectly_comparable, const _Type*, _Pred, _Proj> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, range_difference_t<_Range> __count, const _Type& __value, diff --git a/libcxx/include/__algorithm/ranges_unique.h b/libcxx/include/__algorithm/ranges_unique.h index 45e54276ed55a..be427ccf7fad8 100644 --- a/libcxx/include/__algorithm/ranges_unique.h +++ b/libcxx/include/__algorithm/ranges_unique.h @@ -45,7 +45,7 @@ namespace __unique { sentinel_for<_Iter> _Sent, class _Proj = identity, indirect_equivalence_relation> _Comp = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__unique<_RangeAlgPolicy>( std::move(__first), std::move(__last), std::__make_projected(__comp, __proj)); @@ -57,7 +57,7 @@ namespace __unique { class _Proj = identity, indirect_equivalence_relation, _Proj>> _Comp = ranges::equal_to> requires permutable> - _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__unique<_RangeAlgPolicy>( ranges::begin(__range), ranges::end(__range), std::__make_projected(__comp, __proj)); diff --git a/libcxx/include/__algorithm/ranges_upper_bound.h b/libcxx/include/__algorithm/ranges_upper_bound.h index ec1addd520a61..a1340809048c6 100644 --- a/libcxx/include/__algorithm/ranges_upper_bound.h +++ b/libcxx/include/__algorithm/ranges_upper_bound.h @@ -34,7 +34,7 @@ namespace __upper_bound { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](const auto& __lhs, const auto& __rhs) { return !std::invoke(__comp, __rhs, __lhs); @@ -45,7 +45,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp similarity index 97% rename from libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp rename to libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp index a923301fe8b01..e0d457bd844b8 100644 --- a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // Test that entities declared [[nodiscard]] as at extension by libc++, are -// declared as such when _LIBCPP_DISABLE_NODISCARD_EXT is specified. - -// This test intentionally leaks memory, so it is unsupported under ASAN. -// UNSUPPORTED: asan +// not declared as such when _LIBCPP_DISABLE_NODISCARD_EXT is specified. // All entities to which libc++ applies [[nodiscard]] as an extension should // be tested here and in nodiscard_extensions.verify.cpp. They should also @@ -71,7 +68,7 @@ void test_algorithms() { std::find_if_not(std::begin(arr), std::end(arr), P()); std::find_if(std::begin(arr), std::end(arr), P()); std::find(std::begin(arr), std::end(arr), 1); - std::get_temporary_buffer(1); // intentional memory leak. + std::get_temporary_buffer(1); std::includes(std::begin(arr), std::end(arr), std::begin(arr), std::end(arr)); std::includes(std::begin(arr), std::end(arr), std::begin(arr), std::end(arr), std::greater()); diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp new file mode 100644 index 0000000000000..1577601908c99 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check that ranges algorithms aren't marked [[nodiscard]] when +// _LIBCPP_DISBALE_NODISCARD_EXT is defined + +// UNSUPPORTED: c++03, c++11, c++14 ,c++17 + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_NODISCARD_EXT + +#include + +void test() { + int range[1]; + int* iter = range; + auto pred = [](auto...) { return true; }; + std::ranges::adjacent_find(range); + std::ranges::adjacent_find(iter, iter); + std::ranges::all_of(range, pred); + std::ranges::all_of(iter, iter, pred); + std::ranges::any_of(range, pred); + std::ranges::any_of(iter, iter, pred); + std::ranges::binary_search(range, 1); + std::ranges::binary_search(iter, iter, 1); + std::ranges::clamp(1, 2, 3); + std::ranges::count_if(range, pred); + std::ranges::count_if(iter, iter, pred); + std::ranges::count(range, 1); + std::ranges::count(iter, iter, 1); + std::ranges::equal_range(range, 1); + std::ranges::equal_range(iter, iter, 1); + std::ranges::equal(range, range); + std::ranges::equal(iter, iter, iter, iter); + std::ranges::find_end(range, range); + std::ranges::find_end(iter, iter, iter, iter); + std::ranges::find_first_of(range, range); + std::ranges::find_first_of(iter, iter, iter, iter); + std::ranges::find_if_not(range, pred); + std::ranges::find_if_not(iter, iter, pred); + std::ranges::find_if(range, pred); + std::ranges::find_if(iter, iter, pred); + std::ranges::find(range, 1); + std::ranges::find(iter, iter, 1); + std::ranges::includes(range, range); + std::ranges::includes(iter, iter, iter, iter); + std::ranges::is_heap_until(range); + std::ranges::is_heap_until(iter, iter); + std::ranges::is_heap(range); + std::ranges::is_heap(iter, iter); + std::ranges::is_partitioned(range, pred); + std::ranges::is_partitioned(iter, iter, pred); + std::ranges::is_permutation(range, range); + std::ranges::is_permutation(iter, iter, iter, iter); + std::ranges::is_sorted_until(range); + std::ranges::is_sorted_until(iter, iter); + std::ranges::is_sorted(range); + std::ranges::is_sorted(iter, iter); + std::ranges::lexicographical_compare(range, range); + std::ranges::lexicographical_compare(iter, iter, iter, iter); + std::ranges::lower_bound(range, 1); + std::ranges::lower_bound(iter, iter, 1); + std::ranges::max_element(range); + std::ranges::max_element(iter, iter); + std::ranges::max(1, 2); + std::ranges::max({1, 2, 3}); + std::ranges::max(range); + std::ranges::minmax_element(range); + std::ranges::minmax_element(iter, iter); + std::ranges::minmax(1, 2); + std::ranges::minmax({1, 2, 3}); + std::ranges::minmax(range); + std::ranges::mismatch(range, range); + std::ranges::mismatch(iter, iter, iter, iter); + std::ranges::none_of(range, pred); + std::ranges::none_of(iter, iter, pred); + std::ranges::remove_if(range, pred); + std::ranges::remove_if(iter, iter, pred); + std::ranges::remove(range, 1); + std::ranges::remove(iter, iter, 1); + std::ranges::search_n(range, 1, 1); + std::ranges::search_n(iter, iter, 1, 1); + std::ranges::search(range, range); + std::ranges::search(iter, iter, iter, iter); + std::ranges::unique(range); + std::ranges::unique(iter, iter); + std::ranges::upper_bound(range, 1); + std::ranges::upper_bound(iter, iter, 1); +} diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp new file mode 100644 index 0000000000000..77ac5f3f77903 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check that ranges algorithms are marked [[nodiscard]] as a conforming extension + +// UNSUPPORTED: c++03, c++11, c++14 ,c++17 + +#include + +void test() { + int range[1]; + int* iter = range; + auto pred = [](auto...) { return true; }; + std::ranges::adjacent_find(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::adjacent_find(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::all_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::all_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::any_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::any_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::binary_search(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::binary_search(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::clamp(1, 2, 3); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal_range(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal_range(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_end(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_end(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_first_of(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_first_of(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if_not(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if_not(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::includes(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::includes(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap_until(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap_until(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_partitioned(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_partitioned(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_permutation(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_permutation(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted_until(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted_until(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lexicographical_compare(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lexicographical_compare(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lower_bound(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lower_bound(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max_element(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max_element(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max(1, 2); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max({1, 2, 3}); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax_element(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax_element(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax(1, 2); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax({1, 2, 3}); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::mismatch(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::mismatch(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::none_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::none_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search_n(range, 1, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search_n(iter, iter, 1, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::unique(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::unique(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::upper_bound(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::upper_bound(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +} diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp index b255b5933a49c..1ea5199a6fd21 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp @@ -21,14 +21,14 @@ #include "check_assertion.h" int main(int, char**) { - std::ranges::clamp(1, 2, 0, std::ranges::greater{}); + (void)std::ranges::clamp(1, 2, 0, std::ranges::greater{}); TEST_LIBCPP_ASSERT_FAILURE(std::ranges::clamp(1, 2, 0), "Bad bounds passed to std::ranges::clamp"); - std::ranges::clamp(1, 0, 2); - TEST_LIBCPP_ASSERT_FAILURE(std::ranges::clamp(1, 0, 2, std::ranges::greater{}), - "Bad bounds passed to std::ranges::clamp"); + (void)std::ranges::clamp(1, 0, 2); + TEST_LIBCPP_ASSERT_FAILURE( + std::ranges::clamp(1, 0, 2, std::ranges::greater{}), "Bad bounds passed to std::ranges::clamp"); - std::ranges::clamp(1, 1, 1); // Equal bounds should be fine. + (void)std::ranges::clamp(1, 1, 1); // Equal bounds should be fine. return 0; } diff --git a/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp index 64b3b9db0c30d..d940a7ab97006 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp @@ -24,8 +24,8 @@ // (in1, in2, ...) template constexpr void test(Func&& func, Input1& in1, Input2& in2, Args&& ...args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } constexpr bool test_all() { diff --git a/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp index 448b8bc018ace..b69df4084052b 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp @@ -36,22 +36,22 @@ static_assert(std::convertible_to); // (in, ...) template constexpr void test(Func&& func, Input& in, Args&&... args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Input& in1, Input& in2, Args&&... args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&&... args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } constexpr bool test_all() { @@ -83,17 +83,17 @@ constexpr bool test_all() { test(std::ranges::binary_search, in, x, binary_pred); // min - std::ranges::min(1, 2, binary_pred); - std::ranges::min(std::initializer_list{1, 2}, binary_pred); - std::ranges::min(in, binary_pred); + (void)std::ranges::min(1, 2, binary_pred); + (void)std::ranges::min(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::min(in, binary_pred); // max - std::ranges::max(1, 2, binary_pred); - std::ranges::max(std::initializer_list{1, 2}, binary_pred); - std::ranges::max(in, binary_pred); + (void)std::ranges::max(1, 2, binary_pred); + (void)std::ranges::max(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::max(in, binary_pred); // minmax - std::ranges::minmax(1, 2, binary_pred); - std::ranges::minmax(std::initializer_list{1, 2}, binary_pred); - std::ranges::minmax(in, binary_pred); + (void)std::ranges::minmax(1, 2, binary_pred); + (void)std::ranges::minmax(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::minmax(in, binary_pred); test(std::ranges::min_element, in, binary_pred); test(std::ranges::max_element, in, binary_pred); @@ -108,7 +108,7 @@ constexpr bool test_all() { test(std::ranges::includes, in, in2, binary_pred); test(std::ranges::is_heap, in, binary_pred); test(std::ranges::is_heap_until, in, binary_pred); - std::ranges::clamp(2, 1, 3, binary_pred); + (void)std::ranges::clamp(2, 1, 3, binary_pred); test(std::ranges::is_permutation, in, in2, binary_pred); test(std::ranges::copy_if, in, out, unary_pred); test(std::ranges::remove_copy_if, in, out, unary_pred); diff --git a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp index 9831020877579..35b9f928b739f 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp @@ -37,22 +37,22 @@ struct Bar { // (in, ...) template constexpr void test(Func&& func, Input& in, Args&&... args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Input& in1, Input& in2, Args&&... args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&&... args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } constexpr bool test_all() { @@ -89,17 +89,17 @@ constexpr bool test_all() { test(std::ranges::binary_search, in, x, &Foo::binary_pred, &Bar::val); // min - std::ranges::min(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::min(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::min(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(in, &Foo::binary_pred, &Bar::val); // max - std::ranges::max(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::max(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::max(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(in, &Foo::binary_pred, &Bar::val); // minmax - std::ranges::minmax(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::minmax(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::minmax(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(in, &Foo::binary_pred, &Bar::val); test(std::ranges::min_element, in, &Foo::binary_pred, &Bar::val); test(std::ranges::max_element, in, &Foo::binary_pred, &Bar::val); @@ -115,7 +115,7 @@ constexpr bool test_all() { test(std::ranges::includes, in, in2, &Foo::binary_pred, &Bar::val, &Bar::val); test(std::ranges::is_heap, in, &Foo::binary_pred, &Bar::val); test(std::ranges::is_heap_until, in, &Foo::binary_pred, &Bar::val); - std::ranges::clamp(b, a, c, &Foo::binary_pred, &Bar::val); + (void)std::ranges::clamp(b, a, c, &Foo::binary_pred, &Bar::val); test(std::ranges::is_permutation, in, in2, &Foo::binary_pred, &Bar::val, &Bar::val); test(std::ranges::for_each, in, &Foo::unary_pred, &Bar::val); std::ranges::for_each_n(in.begin(), count, &Foo::unary_pred, &Bar::val); diff --git a/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp index 3458336aefb8a..d383e18adbb99 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp @@ -29,22 +29,22 @@ // (in, ...) template constexpr void test(Func&& func, Input& in, Args&& ...args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Range1& r1, Range2& r2, Args&& ...args) { - func(r1.begin(), r1.end(), r2.begin(), r2.end(), std::forward(args)...); - func(r1, r2, std::forward(args)...); + (void)func(r1.begin(), r1.end(), r2.begin(), r2.end(), std::forward(args)...); + (void)func(r1, r2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&& ...args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } std::mt19937 rand_gen() { return std::mt19937(); } From 2c79186bceeff8da4c7723039d5555b757aa6e91 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 15:45:45 +0000 Subject: [PATCH 325/516] [X86] Cleanup WriteCvtSD2SS/WriteCvtPD2PS overrides The WriteCvtSD2SS/WriteCvtPD2PS* classes were mostly unused as the models were needlessly overriding all instructions - in some cases the folded pattern overrides were entirely missing (but I've confirmed they just have an additional Port23 use) There were a couple of typos (confirmed with Agner/uops.info) - Skylake/Icelake uses Port5+Port01 for XMM/YMM, Skylake uses Port5+Port05 for ZMM but Icelake uses Port5+Port0 --- llvm/lib/Target/X86/X86SchedBroadwell.td | 17 ++++------ llvm/lib/Target/X86/X86SchedHaswell.td | 22 +++++-------- llvm/lib/Target/X86/X86SchedIceLake.td | 31 +++---------------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 21 +++---------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 31 +++---------------- .../llvm-mca/X86/Broadwell/resources-avx1.s | 6 ++-- .../llvm-mca/X86/Haswell/resources-avx1.s | 6 ++-- .../X86/IceLakeServer/resources-avx1.s | 16 +++++----- .../X86/IceLakeServer/resources-sse2.s | 10 +++--- .../X86/SkylakeClient/resources-avx1.s | 14 ++++----- .../X86/SkylakeClient/resources-sse2.s | 6 ++-- .../X86/SkylakeServer/resources-avx1.s | 16 +++++----- .../X86/SkylakeServer/resources-sse2.s | 12 +++---- 13 files changed, 72 insertions(+), 136 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 84b36f1a84c19..003a4e376adb3 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -379,9 +379,9 @@ defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; @@ -879,8 +879,6 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { } def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr", "MMX_CVT(T?)PS2PIrr", - "(V?)CVTPD2PSrr", - "(V?)CVTSD2SSrr", "(V?)CVTSI642SDrr", "(V?)CVTSI2SDrr", "(V?)CVTSI2SSrr", @@ -1003,8 +1001,7 @@ def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2PSYrr, - VCVTPD2DQYrr, +def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>; def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> { @@ -1245,11 +1242,9 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, VCVTPD2PSrm, - CVTPD2DQrm, VCVTPD2DQrm, +def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm, CVTTPD2DQrm, VCVTTPD2DQrm)>; -def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm", - "(V?)CVTSD2SSrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>; def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 4c646d47a90cc..6cff9c30ee161 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -377,10 +377,10 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; @@ -1392,9 +1392,7 @@ def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr, MMX_CVTPS2PIrr, MMX_CVTTPD2PIrr, MMX_CVTTPS2PIrr)>; -def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTPD2PSrr", - "(V?)CVTSD2SSrr", - "(V?)CVTSI(64)?2SDrr", +def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr", "(V?)CVTSI2SSrr", "(V?)CVT(T?)PD2DQrr")>; @@ -1428,8 +1426,7 @@ def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm, VCVTPD2PSrm, - CVTPD2DQrm, VCVTPD2DQrm, +def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm, CVTTPD2DQrm, VCVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -1439,9 +1436,7 @@ def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm, - CVTSD2SSrm, CVTSD2SSrm_Int, - VCVTSD2SSrm, VCVTSD2SSrm_Int)>; +def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>; def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; @@ -1548,8 +1543,7 @@ def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2PSYrr, - VCVTPD2DQYrr, +def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>; def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> { diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 7a1e6c06c8857..fe812a2d71ecf 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -473,10 +473,10 @@ defm : ICXWriteResPair; defm : ICXWriteResPair; defm : ICXWriteResPair; defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -1085,15 +1085,12 @@ def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", "VCVTDQ2PDZ128rr", "VCVTPD2DQZ128rr", "(V?)CVT(T?)PD2DQrr", - "VCVTPD2PSZ128rr", - "(V?)CVTPD2PSrr", "VCVTPD2UDQZ128rr", "VCVTPS2PDZ128rr", "(V?)CVTPS2PDrr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1336,7 +1333,6 @@ def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> { } def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", "VCVTPD2DQ(Y|Z256)rr", - "VCVTPD2PS(Y|Z256)rr", "VCVTPD2UDQZ256rr", "VCVTPS2PD(Y|Z256)rr", "VCVTPS2QQZ256rr", @@ -1356,7 +1352,6 @@ def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> { } def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr, VCVTPD2DQZrr, - VCVTPD2PSZrr, VCVTPD2UDQZrr, VCVTPS2PDZrr, VCVTPS2QQZrr, @@ -1870,13 +1865,6 @@ def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", "VPEXPANDDZ128rm(b?)", "VPEXPANDQZ128rm(b?)")>; -def ICXWriteResGroup153 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[ICXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>; - def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1933,13 +1921,6 @@ def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", "VPEXPANDD(Z|Z256)rm(b?)", "VPEXPANDQ(Z|Z256)rm(b?)")>; -def ICXWriteResGroup163 : SchedWriteRes<[ICXPort23,ICXPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[ICXWriteResGroup163], (instregex "VCVTSD2SSZrm")>; - def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1952,8 +1933,7 @@ def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -2068,7 +2048,6 @@ def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", - "VCVTPD2PSZrm(b?)", "VCVTPD2UDQZrm(b?)", "VCVTQQ2PSZrm(b?)", "VCVTTPD2DQZrm(b?)", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ef8a5efeb76cf..59d7c61a3f08a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -464,9 +464,9 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; -defm : SKLWriteResPair; -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; @@ -944,9 +944,7 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PD2PIrr", "MMX_CVT(T?)PS2PIrr", "(V?)CVT(T?)PD2DQrr", - "(V?)CVTPD2PSrr", "(V?)CVTPS2PDrr", - "(V?)CVTSD2SSrr", "(V?)CVTSI642SDrr", "(V?)CVTSI2SDrr", "(V?)CVTSI2SSrr", @@ -1121,8 +1119,7 @@ def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2PSYrr, - VCVTPS2PDYrr, +def: InstRW<[SKLWriteResGroup89], (instrs VCVTPS2PDYrr, VCVTPD2DQYrr, VCVTTPD2DQYrr)>; @@ -1346,13 +1343,6 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>; -def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup139], (instregex "(V?)CVTSD2SSrm")>; - def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1407,8 +1397,7 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 68820cb8bbf21..da1b47e98d774 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -465,10 +465,10 @@ defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -1066,15 +1066,12 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", "VCVTDQ2PDZ128rr", "VCVTPD2DQZ128rr", "(V?)CVT(T?)PD2DQrr", - "VCVTPD2PSZ128rr", - "(V?)CVTPD2PSrr", "VCVTPD2UDQZ128rr", "VCVTPS2PDZ128rr", "(V?)CVTPS2PDrr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1322,7 +1319,6 @@ def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { } def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", "VCVTPD2DQ(Y|Z256)rr", - "VCVTPD2PS(Y|Z256)rr", "VCVTPD2UDQZ256rr", "VCVTPS2PD(Y|Z256)rr", "VCVTPS2QQZ256rr", @@ -1342,7 +1338,6 @@ def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { } def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, VCVTPD2DQZrr, - VCVTPD2PSZrr, VCVTPD2UDQZrr, VCVTPS2PDZrr, VCVTPS2QQZrr, @@ -1851,13 +1846,6 @@ def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", "VPEXPANDDZ128rm(b?)", "VPEXPANDQZ128rm(b?)")>; -def SKXWriteResGroup153 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>; - def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1914,13 +1902,6 @@ def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", "VPEXPANDD(Z|Z256)rm(b?)", "VPEXPANDQ(Z|Z256)rm(b?)")>; -def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm")>; - def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1933,8 +1914,7 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -2049,7 +2029,6 @@ def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", - "VCVTPD2PSZrm(b?)", "VCVTPD2UDQZrm(b?)", "VCVTQQ2PSZrm(b?)", "VCVTTPD2DQZrm(b?)", diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index f69fed1cd4aa1..27c6120d84987 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1129,7 +1129,7 @@ vzeroupper # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 12 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 429.25 2.25 12.67 +# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 430.25 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1839,7 +1839,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %ymm0, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 6146a207e49e1..ec5c773330c86 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1129,7 +1129,7 @@ vzeroupper # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 12 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 432.58 2.25 12.67 +# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 433.58 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1839,7 +1839,7 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %ymm0, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s index 2219cf0ebfc88..fa0720f4cef57 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 126.00 318.08 228.08 173.17 173.17 34.00 305.58 6.25 12.67 - - +# CHECK-NEXT: - 126.00 319.25 228.25 173.17 173.17 34.00 305.25 6.25 12.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1838,9 +1838,9 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm2 @@ -1854,8 +1854,8 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s index cfdf730d86adf..4720831bf5f3d 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s @@ -691,7 +691,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 40.00 103.92 95.92 63.50 63.50 14.00 83.92 2.25 5.00 - - +# CHECK-NEXT: - 40.00 104.58 96.58 63.50 63.50 14.00 82.58 2.25 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -718,8 +718,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cvtps2dq %xmm0, %xmm2 @@ -730,8 +730,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - cvtsi2sdl (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index 2490cc7744d2c..f28cd83cf8d83 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 13 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 332.08 203.08 173.17 173.17 34.00 323.58 5.25 12.67 +# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1836,10 +1836,10 @@ vzeroupper # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2 @@ -1852,7 +1852,7 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s index 52ad5dbcdb25f..082346c542b47 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 110.92 78.92 63.50 63.50 14.00 96.92 2.25 5.00 +# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -716,7 +716,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 @@ -728,7 +728,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index 57565547002af..1c4939e32c3b1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 317.42 196.42 173.17 173.17 34.00 337.92 6.25 12.67 +# CHECK-NEXT: - 126.00 318.58 196.58 173.17 173.17 34.00 337.58 6.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1836,9 +1836,9 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2 @@ -1852,8 +1852,8 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s index aad08ede8c13d..c7b8c4b78da98 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s @@ -431,7 +431,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 103.58 82.58 63.50 63.50 14.00 97.58 2.25 5.00 +# CHECK-NEXT: - 40.00 104.25 83.25 63.50 63.50 14.00 96.25 2.25 5.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -716,8 +716,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2 @@ -728,8 +728,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2 From 7c05f092c96f0db0956a59b2beac482f9bf03ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20S=C3=A9chet?= Date: Sat, 5 Nov 2022 16:11:13 +0000 Subject: [PATCH 326/516] [NFC] Refactor DAGCombiner::foldSelectOfConstants to reduce nesting --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 160 +++++++++--------- 1 file changed, 81 insertions(+), 79 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7d139fce7f758..ef32cfae26eae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10380,101 +10380,103 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (!C1 || !C2) return SDValue(); + if (CondVT != MVT::i1 || LegalOperations) { + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (CondVT.isInteger() && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isZero() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); + } + // Only do this before legalization to avoid conflicting with target-specific // transforms in the other direction (create a select from a zext/sext). There // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. - if (CondVT == MVT::i1 && !LegalOperations) { - // select Cond, 1, 0 --> zext (Cond) - if (C1->isOne() && C2->isZero()) - return DAG.getZExtOrTrunc(Cond, DL, VT); + assert(CondVT == MVT::i1 && !LegalOperations); - // select Cond, -1, 0 --> sext (Cond) - if (C1->isAllOnes() && C2->isZero()) - return DAG.getSExtOrTrunc(Cond, DL, VT); + // select Cond, 1, 0 --> zext (Cond) + if (C1->isOne() && C2->isZero()) + return DAG.getZExtOrTrunc(Cond, DL, VT); - // select Cond, 0, 1 --> zext (!Cond) - if (C1->isZero() && C2->isOne()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); - return NotCond; - } + // select Cond, -1, 0 --> sext (Cond) + if (C1->isAllOnes() && C2->isZero()) + return DAG.getSExtOrTrunc(Cond, DL, VT); - // select Cond, 0, -1 --> sext (!Cond) - if (C1->isZero() && C2->isAllOnes()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); - return NotCond; - } - - // Use a target hook because some targets may prefer to transform in the - // other direction. - if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) { - // For any constants that differ by 1, we can transform the select into - // an extend and add. - const APInt &C1Val = C1->getAPIntValue(); - const APInt &C2Val = C2->getAPIntValue(); + // select Cond, 0, 1 --> zext (!Cond) + if (C1->isZero() && C2->isOne()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); + return NotCond; + } - // select Cond, C1, C1-1 --> add (zext Cond), C1-1 - if (C1Val - 1 == C2Val) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + // select Cond, 0, -1 --> sext (!Cond) + if (C1->isZero() && C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return NotCond; + } - // select Cond, C1, C1+1 --> add (sext Cond), C1+1 - if (C1Val + 1 == C2Val) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + // Use a target hook because some targets may prefer to transform in the + // other direction. + if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) { + // For any constants that differ by 1, we can transform the select into + // an extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); - // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isZero()) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - SDValue ShAmtC = - DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); - return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); - } + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (C1Val - 1 == C2Val) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } - // select Cond, -1, C --> or (sext Cond), C - if (C1->isAllOnes()) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Cond, N2); - } + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (C1Val + 1 == C2Val) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } - // select Cond, C, -1 --> or (sext (not Cond)), C - if (C2->isAllOnes()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); - } + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isZero()) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) - return V; + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); } - return SDValue(); - } + // select Cond, C, -1 --> or (sext (not Cond)), C + if (C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); + } - // fold (select Cond, 0, 1) -> (xor Cond, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (CondVT.isInteger() && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == - TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == - TargetLowering::ZeroOrOneBooleanContent && - C1->isZero() && C2->isOne()) { - SDValue NotCond = - DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); - if (VT.bitsEq(CondVT)) - return NotCond; - return DAG.getZExtOrTrunc(NotCond, DL, VT); + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); From a41cb8bf58aec8d8bdc2134267d68267886ef9f4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 5 Nov 2022 16:55:09 +0000 Subject: [PATCH 327/516] [SimpleLoopUnswitch] Forget block & loop dispos during trivial unswitch. Unswitching adjusts the CFG in ways that may invalidate cached loop dispositions. Clear all cached block and loop dispositions during trivial unswitching. The same is already done for non-trivial unswitching. Fixes #58751. --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 1 + .../invalidate-block-and-loop-dispositions.ll | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index de7e0500a97ca..a434394f2cdac 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -541,6 +541,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, else // Forget the entire nest as this exits the entire nest. SE->forgetTopmostLoop(&L); + SE->forgetBlockAndLoopDispositions(); } if (MSSAU && VerifyMemorySSA) diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll index c7ce403f808eb..fcef88667449f 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll @@ -110,3 +110,31 @@ inner: } declare i16 @bar() + +define void @pr58751(i16 %a, ptr %dst) { +entry: + %c.1 = icmp eq i16 %a, 0 + br label %outer.header + +outer.header: + %outer.iv = phi i16 [ %a, %entry ], [ %outer.iv.next, %outer.latch ] + br label %inner.header + +inner.header: + %inner.iv = phi i16 [ %outer.iv, %outer.header ], [ %inner.iv.next, %inner.latch ] + br i1 %c.1, label %outer.latch, label %inner.latch + +inner.latch: + %inner.iv.next = add nsw i16 %inner.iv, 1 + store i16 %inner.iv.next, ptr %dst, align 1 + %c.2 = icmp eq i16 %inner.iv.next, 0 + br i1 %c.2, label %exit, label %inner.header + +outer.latch: + %outer.iv.next = add nsw i16 %outer.iv, 1 + br label %outer.header + +exit: + ret void +} + From 82209fd96e456d22c10905fd3d5f6946be3e1b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20S=C3=A9chet?= Date: Sat, 5 Nov 2022 16:44:42 +0000 Subject: [PATCH 328/516] [NFC] Refactor DAGCombiner::foldSelectOfConstants to reduce nesting 2.0 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ef32cfae26eae..0112a401a8ea6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10436,49 +10436,50 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // Use a target hook because some targets may prefer to transform in the // other direction. - if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) { - // For any constants that differ by 1, we can transform the select into - // an extend and add. - const APInt &C1Val = C1->getAPIntValue(); - const APInt &C2Val = C2->getAPIntValue(); + if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) + return SDValue(); - // select Cond, C1, C1-1 --> add (zext Cond), C1-1 - if (C1Val - 1 == C2Val) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + // For any constants that differ by 1, we can transform the select into + // an extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); - // select Cond, C1, C1+1 --> add (sext Cond), C1+1 - if (C1Val + 1 == C2Val) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (C1Val - 1 == C2Val) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } - // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isZero()) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - SDValue ShAmtC = - DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); - return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); - } + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (C1Val + 1 == C2Val) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } - // select Cond, -1, C --> or (sext Cond), C - if (C1->isAllOnes()) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Cond, N2); - } + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isZero()) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } - // select Cond, C, -1 --> or (sext (not Cond)), C - if (C2->isAllOnes()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); - } + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); + } - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) - return V; + // select Cond, C, -1 --> or (sext (not Cond)), C + if (C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); } + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + return SDValue(); } From 23ba5bc5289a2f7a02f7edf3e14e81c57bf2487a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 17:28:23 +0000 Subject: [PATCH 329/516] [MCA][X86] Add more avx512 cvt instructions test coverage --- .../llvm-mca/X86/Generic/resources-avx512.s | 282 ++++++++- .../llvm-mca/X86/Generic/resources-avx512vl.s | 562 +++++++++++++++++- .../X86/IceLakeServer/resources-avx512.s | 282 ++++++++- .../X86/IceLakeServer/resources-avx512vl.s | 562 +++++++++++++++++- .../X86/SkylakeServer/resources-avx512.s | 282 ++++++++- .../X86/SkylakeServer/resources-avx512vl.s | 562 +++++++++++++++++- 6 files changed, 2526 insertions(+), 6 deletions(-) diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index e500f8a91f6a3..9282df15b5f0b 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 45 44.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 52 44.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 52 44.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1667,7 +1857,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 144.67 201.67 16.00 456.67 245.50 245.50 +# CHECK-NEXT: - 1506.00 153.67 282.67 16.00 513.67 275.50 275.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1779,6 +1969,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to16}, %zmm19 @@ -1797,6 +2032,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 44.00 2.50 - - 0.50 - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 20dc9e2fca618..84852a2a8b156 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 22 22.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 28 22.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 28 22.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2620,7 +3000,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 206.00 363.50 32.00 642.50 390.50 390.50 +# CHECK-NEXT: - 1935.00 224.00 525.50 32.00 738.50 450.50 450.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2780,6 +3160,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 22.00 1.00 - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index 13327794d2b4e..eb370576f2c13 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 0.50 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 23 16.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1671,7 +1861,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 - - +# CHECK-NEXT: - 612.00 282.17 67.67 308.83 308.83 16.00 631.17 2.00 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1783,6 +1973,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to16}, %zmm19 @@ -1801,6 +2036,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 16.00 2.00 - - - - 1.00 - - - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s index 39c8b9921dbe7..8ee8f9e6c72f3 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 14 4.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2624,7 +3004,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 423.00 289.33 264.33 407.17 407.17 32.00 684.33 4.00 10.67 - - +# CHECK-NEXT: - 423.00 364.33 339.33 467.17 467.17 32.00 762.33 4.00 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2784,6 +3164,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 4.00 1.00 - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - - - vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - - - vdivpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index 127be91c0deae..2420edc5b7080 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 0.50 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 23 16.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1669,7 +1859,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 +# CHECK-NEXT: - 612.00 277.67 67.67 308.83 308.83 16.00 635.67 2.00 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1781,6 +1971,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 @@ -1799,6 +2034,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 16.00 2.00 - - - - 1.00 - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index 848f1a54e82dd..30cc195dcda73 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 14 4.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2622,7 +3002,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 289.33 201.33 407.17 407.17 32.00 747.33 4.00 10.67 +# CHECK-NEXT: - 423.00 364.33 276.33 467.17 467.17 32.00 825.33 4.00 10.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2782,6 +3162,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 4.00 1.00 - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax){1to2}, %xmm17, %xmm19 From 45dead4b8fe8d6981a5613b0204841da191a2e03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 31 Oct 2022 15:28:32 +0100 Subject: [PATCH 330/516] [flang] Add one missing "llvm_" prefix to gtest_main The `target_link_libraries()` call operates on `gtest_main` rather than `llvm_gtest_main`. I think it was missed while prefixing all the tools in 38151a08c21e0cdacd52af03e4bdff258f0bac6f. Differential Revision: https://reviews.llvm.org/D137080 --- flang/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index ca9124c05f4fe..943c34c1ad4fe 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -170,7 +170,7 @@ if (FLANG_STANDALONE_BUILD) find_package(Threads) target_link_libraries(llvm_gtest PUBLIC Threads::Threads) add_library(llvm_gtest_main ${UNITTEST_DIR}/UnitTestMain/TestMain.cpp) - target_link_libraries(gtest_main PUBLIC llvm_gtest) + target_link_libraries(llvm_gtest_main PUBLIC llvm_gtest) endif() set(FLANG_GTEST_AVAIL 1) else() From edf885531e9e38fb127f6075373b706acef7b59c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2022 19:06:55 +0000 Subject: [PATCH 331/516] [X86] Replace unnecessary int2float and float2double overrides with better base class defs Broadwell/Haswell were completely overriding the class defs - we can remove those overrides entirely by just choosing better class defs (plus a fix for missing mmx folded load). --- llvm/lib/Target/X86/X86SchedBroadwell.td | 39 +++----------- llvm/lib/Target/X86/X86SchedHaswell.td | 51 +++++-------------- .../llvm-mca/X86/Haswell/resources-sse1.s | 4 +- 3 files changed, 21 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 003a4e376adb3..d4ffdea79c5cf 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -367,17 +367,17 @@ defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; defm : BWWriteResPair; @@ -715,14 +715,6 @@ def: InstRW<[BWWriteResGroup14], (instrs LFENCE, WAIT, XGETBV)>; -def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr", - "(V?)CVTSS2SDrr")>; - def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -784,9 +776,7 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSrr)>; -def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr", - "(V?)CVTDQ2PS(Y?)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr")>; def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { let Latency = 3; @@ -858,13 +848,6 @@ def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI64rr", "(V?)CVT(T?)SS2SI64rr", "(V?)CVT(T?)SS2SIrr")>; -def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>; - def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> { let Latency = 4; let NumMicroOps = 2; @@ -1164,9 +1147,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSrm, - CVTDQ2PSrm, - VCVTDQ2PSrm)>; def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>; def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { @@ -1230,13 +1210,6 @@ def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVTSS2SI(64)?rm", "VCVTTSS2SI64rm", "(V?)CVTTSS2SIrm")>; -def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>; - def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 6cff9c30ee161..13b0ed25361e9 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -360,23 +360,23 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 - -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 + +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1131,14 +1131,6 @@ def: InstRW<[HWWriteResGroup30], (instrs LFENCE, WAIT, XGETBV)>; -def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr", - "(V?)CVTSS2SDrr")>; - def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -1241,9 +1233,7 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSrr)>; -def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr", - "(V?)CVTDQ2PS(Y?)rr")>; +def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr")>; def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> { let Latency = 3; @@ -1267,8 +1257,7 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m")>; -def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm, - VCVTPS2DQYrm, +def: InstRW<[HWWriteResGroup52_1], (instrs VCVTPS2DQYrm, VCVTTPS2DQYrm)>; def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> { @@ -1369,13 +1358,6 @@ def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort1]> { def: InstRW<[HWWriteResGroup70], (instregex "(V?)CVT(T?)SD2SI(64)?rr", "(V?)CVT(T?)SS2SI(64)?rr")>; -def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>; - def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> { let Latency = 4; let NumMicroOps = 2; @@ -1414,13 +1396,6 @@ def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVTSD2SI(64)?rm", "VCVTTSS2SI64rm", "(V?)CVTTSS2SIrm")>; -def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>; - def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let Latency = 10; let NumMicroOps = 3; diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 17203584ea3fe..a79a47724f603 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -211,7 +211,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 2 8 1.00 * cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 3 5 2.00 cvtsi2ss %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 @@ -221,7 +221,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 2 8 1.00 * cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx From 89b356f05ab7aa3d96fc7b68aece6e7a5bdb0db5 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 2 Nov 2022 20:27:42 +0100 Subject: [PATCH 332/516] [libc++] Granularize includes Reviewed By: ldionne, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D137283 --- libcxx/include/__format/buffer.h | 2 +- libcxx/include/__format/format_context.h | 1 - .../include/__format/parser_std_format_spec.h | 2 +- .../include/__functional/ranges_operations.h | 3 +- libcxx/include/__iterator/advance.h | 3 +- libcxx/include/__iterator/common_iterator.h | 8 +++- libcxx/include/__iterator/concepts.h | 16 +++++++- libcxx/include/__iterator/counted_iterator.h | 6 ++- .../include/__iterator/incrementable_traits.h | 2 +- libcxx/include/__iterator/iter_swap.h | 3 +- libcxx/include/__iterator/iterator_traits.h | 8 +++- libcxx/include/__iterator/readable_traits.h | 2 +- libcxx/include/__memory/concepts.h | 2 +- libcxx/include/__ranges/common_view.h | 3 +- libcxx/include/__ranges/copyable_box.h | 4 +- libcxx/include/__ranges/drop_view.h | 3 +- libcxx/include/__ranges/enable_view.h | 3 +- libcxx/include/__ranges/filter_view.h | 5 ++- .../include/__ranges/non_propagating_cache.h | 1 - libcxx/include/__ranges/range_adaptor.h | 5 ++- libcxx/include/__ranges/ref_view.h | 3 +- libcxx/include/__ranges/single_view.h | 2 +- libcxx/include/__ranges/size.h | 2 +- libcxx/include/__ranges/take_view.h | 3 +- libcxx/include/algorithm | 1 + libcxx/include/any | 1 + libcxx/include/array | 1 + libcxx/include/bitset | 4 ++ libcxx/include/charconv | 1 + libcxx/include/chrono | 4 ++ libcxx/include/codecvt | 4 ++ libcxx/include/condition_variable | 4 ++ libcxx/include/deque | 1 + libcxx/include/ext/hash_map | 1 + libcxx/include/ext/hash_set | 1 + libcxx/include/filesystem | 4 ++ libcxx/include/forward_list | 1 + libcxx/include/fstream | 4 ++ libcxx/include/functional | 2 +- libcxx/include/ios | 4 ++ libcxx/include/istream | 4 ++ libcxx/include/list | 1 + libcxx/include/locale | 1 + libcxx/include/map | 1 + libcxx/include/memory | 1 + libcxx/include/mutex | 1 + libcxx/include/numbers | 6 ++- libcxx/include/numeric | 1 + libcxx/include/queue | 1 + libcxx/include/random | 1 + libcxx/include/regex | 1 + libcxx/include/set | 1 + libcxx/include/span | 1 + libcxx/include/stack | 1 + libcxx/include/string | 1 + libcxx/include/string_view | 1 + libcxx/include/unordered_map | 1 + libcxx/include/unordered_set | 1 + libcxx/include/valarray | 1 + libcxx/include/vector | 1 + .../test/libcxx/transitive_includes/cxx03.csv | 2 - .../test/libcxx/transitive_includes/cxx11.csv | 2 - .../test/libcxx/transitive_includes/cxx14.csv | 2 - .../test/libcxx/transitive_includes/cxx17.csv | 2 - .../test/libcxx/transitive_includes/cxx20.csv | 2 - .../test/libcxx/transitive_includes/cxx2b.csv | 38 ------------------- .../alg.clamp/ranges.clamp.pass.cpp | 1 + 67 files changed, 133 insertions(+), 74 deletions(-) diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h index 4f7577eb06055..60c1f8093c61d 100644 --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -17,6 +17,7 @@ #include <__algorithm/ranges_copy_n.h> #include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> +#include <__concepts/same_as.h> #include <__config> #include <__format/concepts.h> #include <__format/enable_insertable.h> @@ -27,7 +28,6 @@ #include <__iterator/iterator_traits.h> #include <__iterator/wrap_iter.h> #include <__utility/move.h> -#include #include #include #include diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index 19468de45ca37..b3c0b34427852 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -18,7 +18,6 @@ #include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__utility/move.h> -#include #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index bd5b6ae2a4e54..05f51f7cf9b94 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -19,6 +19,7 @@ #include <__algorithm/find_if.h> #include <__algorithm/min.h> #include <__assert> +#include <__concepts/same_as.h> #include <__config> #include <__debug> #include <__format/format_arg.h> @@ -28,7 +29,6 @@ #include <__format/unicode.h> #include <__variant/monostate.h> #include -#include #include #include #include diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 3f63a86731e0e..87081dd56a05c 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -10,9 +10,10 @@ #ifndef _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H #define _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H +#include <__concepts/equality_comparable.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__utility/forward.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index e26ad4b7317cf..cd6353e65e86a 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -11,6 +11,8 @@ #define _LIBCPP___ITERATOR_ADVANCE_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -18,7 +20,6 @@ #include <__utility/convert_to_integral.h> #include <__utility/move.h> #include <__utility/unreachable.h> -#include #include #include #include diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h index 4de57c5d67c08..a1985c9287534 100644 --- a/libcxx/include/__iterator/common_iterator.h +++ b/libcxx/include/__iterator/common_iterator.h @@ -11,6 +11,13 @@ #define _LIBCPP___ITERATOR_COMMON_ITERATOR_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -18,7 +25,6 @@ #include <__iterator/iter_swap.h> #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index bd68889333ce6..246f84c7cf53d 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -10,6 +10,21 @@ #ifndef _LIBCPP___ITERATOR_CONCEPTS_H #define _LIBCPP___ITERATOR_CONCEPTS_H +#include <__concepts/arithmetic.h> +#include <__concepts/assignable.h> +#include <__concepts/common_reference_with.h> +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/invocable.h> +#include <__concepts/movable.h> +#include <__concepts/predicate.h> +#include <__concepts/regular.h> +#include <__concepts/relation.h> +#include <__concepts/same_as.h> +#include <__concepts/semiregular.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__iterator/incrementable_traits.h> #include <__iterator/iter_move.h> @@ -17,7 +32,6 @@ #include <__iterator/readable_traits.h> #include <__memory/pointer_traits.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/counted_iterator.h b/libcxx/include/__iterator/counted_iterator.h index aab2c51f33a84..8303013ef4e12 100644 --- a/libcxx/include/__iterator/counted_iterator.h +++ b/libcxx/include/__iterator/counted_iterator.h @@ -10,6 +10,11 @@ #define _LIBCPP___ITERATOR_COUNTED_ITERATOR_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/common_with.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/default_sentinel.h> @@ -21,7 +26,6 @@ #include <__memory/pointer_traits.h> #include <__utility/move.h> #include -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/incrementable_traits.h b/libcxx/include/__iterator/incrementable_traits.h index e6a5ed720f9fa..6f966ec4c7484 100644 --- a/libcxx/include/__iterator/incrementable_traits.h +++ b/libcxx/include/__iterator/incrementable_traits.h @@ -10,9 +10,9 @@ #ifndef _LIBCPP___ITERATOR_INCREMENTABLE_TRAITS_H #define _LIBCPP___ITERATOR_INCREMENTABLE_TRAITS_H +#include <__concepts/arithmetic.h> #include <__config> #include <__type_traits/is_primary_template.h> -#include #include #include diff --git a/libcxx/include/__iterator/iter_swap.h b/libcxx/include/__iterator/iter_swap.h index 9e06464c36904..40272e2b0ad5f 100644 --- a/libcxx/include/__iterator/iter_swap.h +++ b/libcxx/include/__iterator/iter_swap.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___ITERATOR_ITER_SWAP_H #define _LIBCPP___ITERATOR_ITER_SWAP_H +#include <__concepts/class_or_enum.h> +#include <__concepts/swappable.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iter_move.h> @@ -16,7 +18,6 @@ #include <__iterator/readable_traits.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h index 918c7138ec187..b4cf07233296d 100644 --- a/libcxx/include/__iterator/iterator_traits.h +++ b/libcxx/include/__iterator/iterator_traits.h @@ -10,11 +10,17 @@ #ifndef _LIBCPP___ITERATOR_ITERATOR_TRAITS_H #define _LIBCPP___ITERATOR_ITERATOR_TRAITS_H +#include <__concepts/arithmetic.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/copyable.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/same_as.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__fwd/pair.h> #include <__iterator/incrementable_traits.h> #include <__iterator/readable_traits.h> -#include #include #include diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h index 500b46ac145f6..dc818d8a230e7 100644 --- a/libcxx/include/__iterator/readable_traits.h +++ b/libcxx/include/__iterator/readable_traits.h @@ -10,8 +10,8 @@ #ifndef _LIBCPP___ITERATOR_READABLE_TRAITS_H #define _LIBCPP___ITERATOR_READABLE_TRAITS_H +#include <__concepts/same_as.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__memory/concepts.h b/libcxx/include/__memory/concepts.h index bc4cff7719e4e..76d2a2e729d61 100644 --- a/libcxx/include/__memory/concepts.h +++ b/libcxx/include/__memory/concepts.h @@ -10,13 +10,13 @@ #ifndef _LIBCPP___MEMORY_CONCEPTS_H #define _LIBCPP___MEMORY_CONCEPTS_H +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/common_view.h b/libcxx/include/__ranges/common_view.h index ec0c7632d9297..aad0d2f49e78d 100644 --- a/libcxx/include/__ranges/common_view.h +++ b/libcxx/include/__ranges/common_view.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___RANGES_COMMON_VIEW_H #define _LIBCPP___RANGES_COMMON_VIEW_H +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> #include <__config> #include <__iterator/common_iterator.h> #include <__iterator/iterator_traits.h> @@ -21,7 +23,6 @@ #include <__ranges/view_interface.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/copyable_box.h b/libcxx/include/__ranges/copyable_box.h index 9b708d942c20f..fb3d6e409c8f2 100644 --- a/libcxx/include/__ranges/copyable_box.h +++ b/libcxx/include/__ranges/copyable_box.h @@ -10,11 +10,13 @@ #ifndef _LIBCPP___RANGES_COPYABLE_BOX_H #define _LIBCPP___RANGES_COPYABLE_BOX_H +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/movable.h> #include <__config> #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__utility/move.h> -#include #include #include diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index 9e0fe16a55a40..3131f4b43d41d 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -11,6 +11,8 @@ #include <__algorithm/min.h> #include <__assert> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> #include <__config> #include <__functional/bind_back.h> #include <__fwd/span.h> @@ -33,7 +35,6 @@ #include <__utility/auto_cast.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h index a1e5721404cdd..2dc4752ff428f 100644 --- a/libcxx/include/__ranges/enable_view.h +++ b/libcxx/include/__ranges/enable_view.h @@ -10,8 +10,9 @@ #ifndef _LIBCPP___RANGES_ENABLE_VIEW_H #define _LIBCPP___RANGES_ENABLE_VIEW_H +#include <__concepts/derived_from.h> +#include <__concepts/same_as.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 3e50c17c64050..74c07d9d36691 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -10,6 +10,10 @@ #define _LIBCPP___RANGES_FILTER_VIEW_H #include <__algorithm/ranges_find_if.h> +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> #include <__config> #include <__debug> #include <__functional/bind_back.h> @@ -30,7 +34,6 @@ #include <__utility/forward.h> #include <__utility/in_place.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/non_propagating_cache.h b/libcxx/include/__ranges/non_propagating_cache.h index b565af69e609d..76ca73dd03742 100644 --- a/libcxx/include/__ranges/non_propagating_cache.h +++ b/libcxx/include/__ranges/non_propagating_cache.h @@ -14,7 +14,6 @@ #include <__iterator/iterator_traits.h> // iter_reference_t #include <__memory/addressof.h> #include <__utility/forward.h> -#include // constructible_from #include #include diff --git a/libcxx/include/__ranges/range_adaptor.h b/libcxx/include/__ranges/range_adaptor.h index c287a193a57db..37e48179e378b 100644 --- a/libcxx/include/__ranges/range_adaptor.h +++ b/libcxx/include/__ranges/range_adaptor.h @@ -10,13 +10,16 @@ #ifndef _LIBCPP___RANGES_RANGE_ADAPTOR_H #define _LIBCPP___RANGES_RANGE_ADAPTOR_H +#include <__concepts/constructible.h> +#include <__concepts/derived_from.h> +#include <__concepts/invocable.h> +#include <__concepts/same_as.h> #include <__config> #include <__functional/compose.h> #include <__functional/invoke.h> #include <__ranges/concepts.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/ref_view.h b/libcxx/include/__ranges/ref_view.h index 7d1ae74ca2a50..bf94889f62bf7 100644 --- a/libcxx/include/__ranges/ref_view.h +++ b/libcxx/include/__ranges/ref_view.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___RANGES_REF_VIEW_H #define _LIBCPP___RANGES_REF_VIEW_H +#include <__concepts/convertible_to.h> +#include <__concepts/different_from.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -22,7 +24,6 @@ #include <__ranges/size.h> #include <__ranges/view_interface.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h index 38726c225e57b..e15a0c3199597 100644 --- a/libcxx/include/__ranges/single_view.h +++ b/libcxx/include/__ranges/single_view.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANGES_SINGLE_VIEW_H #define _LIBCPP___RANGES_SINGLE_VIEW_H +#include <__concepts/constructible.h> #include <__config> #include <__ranges/copyable_box.h> #include <__ranges/range_adaptor.h> @@ -16,7 +17,6 @@ #include <__utility/forward.h> #include <__utility/in_place.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index 0e6d295651a3c..e4c82222f56ee 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -9,13 +9,13 @@ #ifndef _LIBCPP___RANGES_SIZE_H #define _LIBCPP___RANGES_SIZE_H +#include <__concepts/arithmetic.h> #include <__concepts/class_or_enum.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> #include <__utility/auto_cast.h> -#include #include #include diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index 3fb9499c0118a..2c98a0bed690b 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -11,6 +11,8 @@ #include <__algorithm/min.h> #include <__algorithm/ranges_min.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> #include <__config> #include <__functional/bind_back.h> #include <__fwd/span.h> @@ -34,7 +36,6 @@ #include <__utility/auto_cast.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 9b21e7bbf1936..e27872285f15e 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1917,6 +1917,7 @@ template #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include # include diff --git a/libcxx/include/any b/libcxx/include/any index dc26a20e6ae22..763e3a1815d08 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -699,6 +699,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include diff --git a/libcxx/include/array b/libcxx/include/array index af199ca2e7199..cb1a6d1202582 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -534,6 +534,7 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/bitset b/libcxx/include/bitset index 72b678b5ffdda..c260f9f592c01 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -1152,4 +1152,8 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_BITSET diff --git a/libcxx/include/charconv b/libcxx/include/charconv index b8664a456b6b9..d2031eac8ba9b 100644 --- a/libcxx/include/charconv +++ b/libcxx/include/charconv @@ -832,6 +832,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/chrono b/libcxx/include/chrono index f34919040950b..05e4b6d126ebf 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -728,4 +728,8 @@ constexpr chrono::year operator ""y(unsigned lo # pragma GCC system_header #endif +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_CHRONO diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index a724477b15548..c6445b115cb73 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -553,4 +553,8 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_CODECVT diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index 038f0ba2257d7..6b4d506939e16 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -268,4 +268,8 @@ _LIBCPP_END_NAMESPACE_STD #endif // !_LIBCPP_HAS_NO_THREADS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_CONDITION_VARIABLE diff --git a/libcxx/include/deque b/libcxx/include/deque index b6e092b396370..45e1a68d3a13f 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -2943,6 +2943,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 48186aed0b6a6..3ad4b166865af 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -983,6 +983,7 @@ operator!=(const hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x, } // namespace __gnu_cxx #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 68b449d6b867a..dc8b786453842 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -663,6 +663,7 @@ operator!=(const hash_multiset<_Value, _Hash, _Pred, _Alloc>& __x, } // namespace __gnu_cxx #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 1e7efd248aa18..98d4bb9261bcc 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -461,4 +461,8 @@ inline constexpr bool std::ranges::enable_view +#endif + #endif // _LIBCPP_FILESYSTEM diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 1f817ae8927fd..4ef6f88ef2f4c 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -1792,6 +1792,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 95f345fae4d78..1d2cb79b26b24 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -1744,4 +1744,8 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_FSTREAM diff --git a/libcxx/include/functional b/libcxx/include/functional index 4c4e02dd702bc..8589d3a9d6a9c 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -531,7 +531,6 @@ POLICY: For non-variadic implementations, the number of arguments is limited #include <__functional/unary_negate.h> #include <__functional/unwrap_ref.h> #include <__utility/forward.h> -#include #include #include // TODO: find out why removing this breaks the modules build #include @@ -543,6 +542,7 @@ POLICY: For non-variadic implementations, the number of arguments is limited #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/ios b/libcxx/include/ios index 6e8360f4ab565..ac4637bce1d57 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -1038,4 +1038,8 @@ defaultfloat(ios_base& __str) _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_IOS diff --git a/libcxx/include/istream b/libcxx/include/istream index 403b29c29e9e3..1c9adcc0c6297 100644 --- a/libcxx/include/istream +++ b/libcxx/include/istream @@ -1637,6 +1637,10 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_iostream; _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + _LIBCPP_POP_MACROS #endif // _LIBCPP_ISTREAM diff --git a/libcxx/include/list b/libcxx/include/list index d3c47bb1d6155..de2693e79aff5 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -2375,6 +2375,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/locale b/libcxx/include/locale index c9ec7c36f5820..fd31f0d87672f 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -4353,6 +4353,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/map b/libcxx/include/map index 2d55b69dc9267..9cf47b245a02a 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -2352,6 +2352,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include diff --git a/libcxx/include/memory b/libcxx/include/memory index 8694cf6994a7c..48e808ef54cb1 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -900,6 +900,7 @@ template #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include diff --git a/libcxx/include/mutex b/libcxx/include/mutex index d0b53ba75d834..d11ffb20eff93 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -704,6 +704,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/numbers b/libcxx/include/numbers index 3c8527dfc2de1..1d9b6b0e5fb35 100644 --- a/libcxx/include/numbers +++ b/libcxx/include/numbers @@ -59,8 +59,8 @@ namespace std::numbers { */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__concepts/arithmetic.h> #include <__config> -#include #include #include @@ -131,4 +131,8 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER > 17 +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_NUMBERS diff --git a/libcxx/include/numeric b/libcxx/include/numeric index 64cd45b430828..2fb6f9ed01929 100644 --- a/libcxx/include/numeric +++ b/libcxx/include/numeric @@ -172,6 +172,7 @@ template #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/queue b/libcxx/include/queue index d23f2385eda34..c58da5ec6ee3c 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -959,6 +959,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator # include #endif diff --git a/libcxx/include/random b/libcxx/include/random index ecab84a573550..f8077f10c17f3 100644 --- a/libcxx/include/random +++ b/libcxx/include/random @@ -1730,6 +1730,7 @@ class piecewise_linear_distribution #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include # include # include diff --git a/libcxx/include/regex b/libcxx/include/regex index 91a10feef6b5d..8118c944cae05 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -6858,6 +6858,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/set b/libcxx/include/set index 3933ef22c221e..1154c4e9509c7 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -1597,6 +1597,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/span b/libcxx/include/span index 337a67434ec4c..8afd1942c1f31 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -590,6 +590,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/stack b/libcxx/include/stack index 7b08d57da152b..2abbcd025c4aa 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -362,6 +362,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/string b/libcxx/include/string index 592c63466be74..6aee5a489a754 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -4613,6 +4613,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include # include diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 2c4f306458c48..8f39cdcc23f62 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -1023,6 +1023,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index a727c36223e5e..cd0aea1205d52 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -2646,6 +2646,7 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include #endif diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 4786a8a8c5f18..9a25510139428 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -1816,6 +1816,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/valarray b/libcxx/include/valarray index f28e471dfb3f9..6c33d0531cb88 100644 --- a/libcxx/include/valarray +++ b/libcxx/include/valarray @@ -4931,6 +4931,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include #endif diff --git a/libcxx/include/vector b/libcxx/include/vector index b5a97c66cf53a..8f9b8fb3d727b 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -3281,6 +3281,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index a7250cf841dba..6e5ce44423c71 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -335,7 +335,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -650,7 +649,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 97dff0bf62d4d..837e22b85df79 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -335,7 +335,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -651,7 +650,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index e0935e85f1b7f..e1cbeeee647f0 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -337,7 +337,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -653,7 +652,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index e0935e85f1b7f..e1cbeeee647f0 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -337,7 +337,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -653,7 +652,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 4589f7b98b2be..ead6b43f1bada 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -349,7 +349,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -663,7 +662,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv index e4e82d3cf4319..d896ed6cc26b8 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -1,6 +1,5 @@ algorithm bit algorithm climits -algorithm concepts algorithm cstddef algorithm cstdint algorithm cstdlib @@ -12,7 +11,6 @@ algorithm new algorithm type_traits algorithm version any atomic -any concepts any cstddef any cstdint any cstdlib @@ -26,7 +24,6 @@ any type_traits any typeinfo any version array compare -array concepts array cstddef array cstdlib array initializer_list @@ -59,7 +56,6 @@ bit limits bit type_traits bit version bitset climits -bitset concepts bitset cstddef bitset cstdint bitset cstdlib @@ -75,7 +71,6 @@ bitset version ccomplex complex charconv cerrno charconv cmath -charconv concepts charconv cstddef charconv cstdint charconv cstdlib @@ -88,7 +83,6 @@ chrono bit chrono charconv chrono cmath chrono compare -chrono concepts chrono cstddef chrono cstdint chrono cstdlib @@ -111,7 +105,6 @@ cmath type_traits cmath version codecvt atomic codecvt cctype -codecvt concepts codecvt cstddef codecvt cstdint codecvt cstdlib @@ -142,7 +135,6 @@ concepts cstddef concepts type_traits concepts version condition_variable atomic -condition_variable concepts condition_variable cstddef condition_variable cstdint condition_variable cstdlib @@ -171,7 +163,6 @@ cwchar cwctype cwctype cctype deque atomic deque compare -deque concepts deque cstddef deque cstdint deque cstdlib @@ -248,7 +239,6 @@ experimental/vector experimental/memory_resource experimental/vector vector ext/hash_map algorithm ext/hash_map cmath -ext/hash_map concepts ext/hash_map cstddef ext/hash_map cstdint ext/hash_map cstring @@ -261,7 +251,6 @@ ext/hash_map string ext/hash_map type_traits ext/hash_set algorithm ext/hash_set cmath -ext/hash_set concepts ext/hash_set cstddef ext/hash_set cstdint ext/hash_set cstring @@ -273,7 +262,6 @@ ext/hash_set string ext/hash_set type_traits filesystem cerrno filesystem compare -filesystem concepts filesystem cstddef filesystem cstdint filesystem cstdlib @@ -292,7 +280,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -308,7 +295,6 @@ format type_traits format version forward_list atomic forward_list compare -forward_list concepts forward_list cstddef forward_list cstdint forward_list cstdlib @@ -324,7 +310,6 @@ forward_list typeinfo forward_list version fstream atomic fstream cctype -fstream concepts fstream cstddef fstream cstdint fstream cstdio @@ -345,7 +330,6 @@ fstream typeinfo fstream version functional array functional atomic -functional concepts functional cstddef functional cstdint functional cstdlib @@ -378,7 +362,6 @@ iomanip istream iomanip version ios atomic ios cctype -ios concepts ios cstddef ios cstdint ios cstdlib @@ -400,7 +383,6 @@ iostream istream iostream ostream iostream streambuf iostream version -istream concepts istream cstddef istream iosfwd istream ostream @@ -423,7 +405,6 @@ limits type_traits limits version list atomic list compare -list concepts list cstddef list cstdint list cstdlib @@ -439,7 +420,6 @@ list typeinfo list version locale atomic locale cctype -locale concepts locale cstddef locale cstdint locale cstdio @@ -459,7 +439,6 @@ locale type_traits locale typeinfo locale version map compare -map concepts map cstddef map cstdlib map initializer_list @@ -472,7 +451,6 @@ map type_traits map version memory atomic memory compare -memory concepts memory cstddef memory cstdint memory cstdlib @@ -495,7 +473,6 @@ memory_resource stdexcept memory_resource tuple memory_resource version mutex atomic -mutex concepts mutex cstddef mutex cstdint mutex cstdlib @@ -516,11 +493,9 @@ new cstdlib new exception new type_traits new version -numbers concepts numbers type_traits numbers version numeric cmath -numeric concepts numeric cstddef numeric limits numeric type_traits @@ -547,7 +522,6 @@ ostream streambuf ostream type_traits ostream version queue compare -queue concepts queue cstddef queue cstdlib queue deque @@ -559,7 +533,6 @@ queue version random bit random climits random cmath -random concepts random cstddef random cstdint random cstdlib @@ -572,7 +545,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list @@ -593,7 +565,6 @@ ratio version regex atomic regex cctype regex compare -regex concepts regex cstddef regex cstdint regex cstdlib @@ -624,7 +595,6 @@ semaphore ratio semaphore type_traits semaphore version set compare -set concepts set cstddef set cstdlib set initializer_list @@ -643,7 +613,6 @@ shared_mutex system_error shared_mutex type_traits shared_mutex version span array -span concepts span cstddef span initializer_list span limits @@ -656,7 +625,6 @@ sstream string sstream type_traits sstream version stack compare -stack concepts stack cstddef stack deque stack initializer_list @@ -671,7 +639,6 @@ streambuf iosfwd streambuf version string climits string compare -string concepts string cstddef string cstdint string cstdio @@ -688,7 +655,6 @@ string tuple string type_traits string version string_view compare -string_view concepts string_view cstddef string_view cstdint string_view cstdio @@ -744,7 +710,6 @@ typeinfo exception typeinfo type_traits unordered_map cmath unordered_map compare -unordered_map concepts unordered_map cstddef unordered_map cstdint unordered_map cstdlib @@ -759,7 +724,6 @@ unordered_map type_traits unordered_map version unordered_set cmath unordered_set compare -unordered_set concepts unordered_set cstddef unordered_set cstdint unordered_set cstdlib @@ -780,7 +744,6 @@ utility limits utility type_traits utility version valarray cmath -valarray concepts valarray cstddef valarray cstdlib valarray cstring @@ -804,7 +767,6 @@ variant version vector atomic vector climits vector compare -vector concepts vector cstddef vector cstdint vector cstdlib diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp index 553ffb9d4b487..9da52e2772a9e 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include From 59ef4b3686e4704f253cf37011c7f0362095cdfe Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 22 Sep 2022 21:53:13 +0200 Subject: [PATCH 333/516] [libc++] Split __allocator_destructor out of shared_ptr.h Reviewed By: ldionne, huixie90, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D134479 --- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__functional/function.h | 2 +- libcxx/include/__locale | 6 ++- .../include/__memory/allocator_destructor.h | 42 +++++++++++++++++ libcxx/include/__memory/shared_ptr.h | 21 +-------- libcxx/include/__split_buffer | 1 - libcxx/include/any | 4 +- libcxx/include/codecvt | 11 +++++ libcxx/include/deque | 4 ++ libcxx/include/forward_list | 5 +- libcxx/include/fstream | 5 ++ libcxx/include/future | 1 + libcxx/include/ios | 9 ++++ libcxx/include/list | 5 +- libcxx/include/locale | 3 ++ libcxx/include/module.modulemap.in | 1 + libcxx/include/ostream | 2 + libcxx/include/regex | 5 ++ libcxx/include/vector | 1 + libcxx/test/libcxx/private_headers.verify.cpp | 1 + .../test/libcxx/transitive_includes/cxx03.csv | 8 ++++ .../test/libcxx/transitive_includes/cxx11.csv | 8 ++++ .../test/libcxx/transitive_includes/cxx14.csv | 8 ++++ .../test/libcxx/transitive_includes/cxx17.csv | 8 ++++ .../test/libcxx/transitive_includes/cxx20.csv | 7 +++ .../test/libcxx/transitive_includes/cxx2b.csv | 47 +++---------------- 26 files changed, 148 insertions(+), 68 deletions(-) create mode 100644 libcxx/include/__memory/allocator_destructor.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 7603b37c9d5c0..4310cfdc50308 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -396,6 +396,7 @@ set(files __memory/allocation_guard.h __memory/allocator.h __memory/allocator_arg_t.h + __memory/allocator_destructor.h __memory/allocator_traits.h __memory/assume_aligned.h __memory/auto_ptr.h diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 44e33a5a3a624..2d9cdc0459d63 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -18,10 +18,10 @@ #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/builtin_new_allocator.h> #include <__memory/compressed_pair.h> -#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 65160f3562fbc..242de1ad1a716 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -12,16 +12,18 @@ #include <__availability> #include <__config> -#include <__memory/shared_ptr.h> #include #include #include #include #include +// Some platforms require more includes than others. Keep the includes on all plaforms for now. +#include +#include + #if defined(_LIBCPP_MSVCRT_LIKE) # include <__support/win32/locale_win32.h> -# include #elif defined(_AIX) || defined(__MVS__) # include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) diff --git a/libcxx/include/__memory/allocator_destructor.h b/libcxx/include/__memory/allocator_destructor.h new file mode 100644 index 0000000000000..623ad8ad800a1 --- /dev/null +++ b/libcxx/include/__memory/allocator_destructor.h @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H +#define _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H + +#include <__config> +#include <__memory/allocator_traits.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class __allocator_destructor +{ + typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits; +public: + typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer; + typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type; +private: + _Alloc& __alloc_; + size_type __s_; +public: + _LIBCPP_INLINE_VISIBILITY __allocator_destructor(_Alloc& __a, size_type __s) + _NOEXCEPT + : __alloc_(__a), __s_(__s) {} + _LIBCPP_INLINE_VISIBILITY + void operator()(pointer __p) _NOEXCEPT + {__alloc_traits::deallocate(__alloc_, __p, __s_);} +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 238d765f3ebb0..57051dfde3027 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -21,6 +21,7 @@ #include <__memory/addressof.h> #include <__memory/allocation_guard.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/auto_ptr.h> #include <__memory/compressed_pair.h> @@ -42,32 +43,12 @@ # include #endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_BEGIN_NAMESPACE_STD -template -class __allocator_destructor -{ - typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits; -public: - typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer; - typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type; -private: - _Alloc& __alloc_; - size_type __s_; -public: - _LIBCPP_INLINE_VISIBILITY __allocator_destructor(_Alloc& __a, size_type __s) - _NOEXCEPT - : __alloc_(__a), __s_(__s) {} - _LIBCPP_INLINE_VISIBILITY - void operator()(pointer __p) _NOEXCEPT - {__alloc_traits::deallocate(__alloc_, __p, __s_);} -}; - // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) // should be sufficient for thread safety. // See https://llvm.org/PR22803 diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 1754baacd76ab..823487318ee9b 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -22,7 +22,6 @@ #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/any b/libcxx/include/any index 763e3a1815d08..ec5171ff714e0 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -84,8 +84,8 @@ namespace std { #include <__availability> #include <__config> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include <__utility/forward.h> #include <__utility/in_place.h> @@ -699,7 +699,9 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include +# include # include # include # include diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index c6445b115cb73..ce378c8d1c6b2 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -554,7 +554,18 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include #endif #endif // _LIBCPP_CODECVT diff --git a/libcxx/include/deque b/libcxx/include/deque index 45e1a68d3a13f..989b64d7a109e 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -176,6 +176,7 @@ template #include <__iterator/next.h> #include <__iterator/prev.h> #include <__iterator/reverse_iterator.h> +#include <__memory/allocator_destructor.h> #include <__memory/pointer_traits.h> #include <__memory/temp_value.h> #include <__memory/unique_ptr.h> @@ -2943,9 +2944,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include +# include # include +# include #endif #endif // _LIBCPP_DEQUE diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 4ef6f88ef2f4c..51643e9e2828c 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -190,10 +190,10 @@ template #include <__iterator/next.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__memory_resource/polymorphic_allocator.h> @@ -1792,9 +1792,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include +# include # include +# include #endif #endif // _LIBCPP_FORWARD_LIST diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 1d2cb79b26b24..6b6e5b603c2e7 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -1745,7 +1745,12 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include +# include +# include +# include +# include #endif #endif // _LIBCPP_FSTREAM diff --git a/libcxx/include/future b/libcxx/include/future index 5371de4fc43ae..e317e8d636ef7 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -367,6 +367,7 @@ template struct uses_allocator, Alloc>; #include <__chrono/time_point.h> #include <__config> #include <__memory/allocator_arg_t.h> +#include <__memory/allocator_destructor.h> #include <__memory/uses_allocator.h> #include <__utility/auto_cast.h> #include <__utility/forward.h> diff --git a/libcxx/include/ios b/libcxx/include/ios index ac4637bce1d57..e67b7d2b779a3 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -1040,6 +1040,15 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include +# include +# include +# include +# include +# include +# include +# include +# include #endif #endif // _LIBCPP_IOS diff --git a/libcxx/include/list b/libcxx/include/list index de2693e79aff5..4d68c5e72668b 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -196,10 +196,10 @@ template #include <__iterator/reverse_iterator.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__memory_resource/polymorphic_allocator.h> @@ -2375,9 +2375,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include +# include # include +# include #endif #endif // _LIBCPP_LIST diff --git a/libcxx/include/locale b/libcxx/include/locale index fd31f0d87672f..8a330ae374d0f 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -4353,9 +4353,12 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include +# include +# include #endif #endif // _LIBCPP_LOCALE diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 79c7c3d354c8d..120cf1916e8cd 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1031,6 +1031,7 @@ module std [system] { module allocation_guard { private header "__memory/allocation_guard.h" } module allocator { private header "__memory/allocator.h" } module allocator_arg_t { private header "__memory/allocator_arg_t.h" } + module allocator_destructor { private header "__memory/allocator_destructor.h" } module allocator_traits { private header "__memory/allocator_traits.h" } module assume_aligned { private header "__memory/assume_aligned.h" } module auto_ptr { private header "__memory/auto_ptr.h" } diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 77ec87b35e4fe..1d943d8a8abd2 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -165,6 +165,7 @@ basic_ostream& operator<<(basic_ostream&, cons #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include #include @@ -1188,6 +1189,7 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream; _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/regex b/libcxx/include/regex index 8118c944cae05..3c3a2e4a79486 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -773,6 +773,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> +#include #include #include #include @@ -6858,8 +6859,12 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include +# include # include +# include +# include # include #endif diff --git a/libcxx/include/vector b/libcxx/include/vector index 8f9b8fb3d727b..d433f0c8c1969 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -3281,6 +3281,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include # include diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index 928ce0cd3600b..9b1120efd5234 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -427,6 +427,7 @@ END-SCRIPT #include <__memory/allocation_guard.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocation_guard.h'}} #include <__memory/allocator.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator.h'}} #include <__memory/allocator_arg_t.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_arg_t.h'}} +#include <__memory/allocator_destructor.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_destructor.h'}} #include <__memory/allocator_traits.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_traits.h'}} #include <__memory/assume_aligned.h> // expected-error@*:* {{use of private header from outside its module: '__memory/assume_aligned.h'}} #include <__memory/auto_ptr.h> // expected-error@*:* {{use of private header from outside its module: '__memory/auto_ptr.h'}} diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 6e5ce44423c71..a5f334a04f7a7 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts chrono cstdint chrono ctime chrono limits @@ -610,17 +611,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 837e22b85df79..6ad398a3b092c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts chrono cstdint chrono ctime chrono limits @@ -611,17 +612,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index e1cbeeee647f0..2b13557fd2460 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts chrono cstdint chrono ctime chrono limits @@ -613,17 +614,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index e1cbeeee647f0..2b13557fd2460 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -107,6 +107,7 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts chrono cstdint chrono ctime chrono limits @@ -613,17 +614,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index ead6b43f1bada..9f3d88d78c5ae 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -623,17 +623,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv index d896ed6cc26b8..b00cba3aa7a8f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -10,13 +10,11 @@ algorithm limits algorithm new algorithm type_traits algorithm version -any atomic any cstddef any cstdint any cstdlib any cstring any initializer_list -any iosfwd any limits any new any stdexcept @@ -103,21 +101,12 @@ chrono version cinttypes cstdint cmath type_traits cmath version -codecvt atomic codecvt cctype codecvt cstddef codecvt cstdint -codecvt cstdlib codecvt cstring -codecvt initializer_list -codecvt iosfwd -codecvt limits codecvt mutex -codecvt new -codecvt stdexcept codecvt string -codecvt type_traits -codecvt typeinfo codecvt version compare cmath compare cstddef @@ -161,20 +150,17 @@ ctgmath ccomplex ctgmath cmath cwchar cwctype cwctype cctype -deque atomic deque compare deque cstddef deque cstdint deque cstdlib deque cstring deque initializer_list -deque iosfwd deque limits deque new deque stdexcept deque tuple deque type_traits -deque typeinfo deque version exception cstddef exception cstdlib @@ -293,22 +279,18 @@ format string format string_view format type_traits format version -forward_list atomic forward_list compare forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring forward_list initializer_list -forward_list iosfwd forward_list limits forward_list new forward_list stdexcept forward_list tuple forward_list type_traits -forward_list typeinfo forward_list version -fstream atomic fstream cctype fstream cstddef fstream cstdint @@ -317,13 +299,9 @@ fstream cstdlib fstream cstring fstream filesystem fstream initializer_list -fstream iosfwd fstream istream -fstream limits fstream mutex -fstream new fstream ostream -fstream stdexcept fstream string fstream type_traits fstream typeinfo @@ -364,18 +342,11 @@ ios atomic ios cctype ios cstddef ios cstdint -ios cstdlib ios cstring -ios initializer_list ios iosfwd -ios limits ios mutex -ios new -ios stdexcept ios string ios system_error -ios type_traits -ios typeinfo ios version iosfwd version iostream ios @@ -403,22 +374,18 @@ latch limits latch version limits type_traits limits version -list atomic list compare list cstddef list cstdint list cstdlib list cstring list initializer_list -list iosfwd list limits list new list stdexcept list tuple list type_traits -list typeinfo list version -locale atomic locale cctype locale cstddef locale cstdint @@ -432,11 +399,9 @@ locale iosfwd locale limits locale mutex locale new -locale stdexcept locale streambuf locale string locale type_traits -locale typeinfo locale version map compare map cstddef @@ -510,16 +475,22 @@ optional new optional stdexcept optional type_traits optional version +ostream atomic ostream bitset ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue cstddef @@ -562,7 +533,6 @@ ratio climits ratio cstdint ratio type_traits ratio version -regex atomic regex cctype regex compare regex cstddef @@ -571,14 +541,11 @@ regex cstdlib regex cstring regex deque regex initializer_list -regex iosfwd regex limits regex mutex -regex new regex stdexcept regex string regex type_traits -regex typeinfo regex vector regex version scoped_allocator cstddef @@ -764,7 +731,6 @@ variant new variant tuple variant type_traits variant version -vector atomic vector climits vector compare vector cstddef @@ -778,5 +744,4 @@ vector new vector stdexcept vector tuple vector type_traits -vector typeinfo vector version From 23831f0efe4c7585b4b5abba731aa56c29aa21f1 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sat, 5 Nov 2022 20:26:22 +0000 Subject: [PATCH 334/516] [gn build] Port 59ef4b3686e4 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index c9234ab175973..e7976a0f9b231 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -465,6 +465,7 @@ if (current_toolchain == default_toolchain) { "__memory/allocation_guard.h", "__memory/allocator.h", "__memory/allocator_arg_t.h", + "__memory/allocator_destructor.h", "__memory/allocator_traits.h", "__memory/assume_aligned.h", "__memory/auto_ptr.h", From d1829c308da9c2adc46640a960c105b573db6555 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 5 Nov 2022 22:35:54 +0100 Subject: [PATCH 335/516] Add support of the next Ubuntu (Ubuntu 23.04 - Lunar Lobster) --- clang/include/clang/Driver/Distro.h | 3 ++- clang/lib/Driver/Distro.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 01d66b30b0386..1aaf93ddb7c43 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -76,6 +76,7 @@ class Distro { UbuntuImpish, UbuntuJammy, UbuntuKinetic, + UbuntuLunar, UnknownDistro }; @@ -127,7 +128,7 @@ class Distro { } bool IsUbuntu() const { - return DistroVal >= UbuntuHardy && DistroVal <= UbuntuKinetic; + return DistroVal >= UbuntuHardy && DistroVal <= UbuntuLunar; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 1898667279cc3..87a0c5a585115 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -92,6 +92,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("impish", Distro::UbuntuImpish) .Case("jammy", Distro::UbuntuJammy) .Case("kinetic", Distro::UbuntuKinetic) + .Case("lunar", Distro::UbuntuLunar) .Default(Distro::UnknownDistro); return Version; } From 38c07d90c95ccfabe64ba25b45de76af49230018 Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Sat, 5 Nov 2022 14:36:40 -0700 Subject: [PATCH 336/516] Change a 'default_triple' to 'object-emission' as pointed out in post-commit review on D136612 by Igor Kudrin. --- llvm/test/MC/AsmParser/layout-interdependency.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/MC/AsmParser/layout-interdependency.s b/llvm/test/MC/AsmParser/layout-interdependency.s index ca766573426a2..f26149ced766f 100644 --- a/llvm/test/MC/AsmParser/layout-interdependency.s +++ b/llvm/test/MC/AsmParser/layout-interdependency.s @@ -1,5 +1,5 @@ # RUN: not llvm-mc --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s -# REQUIRES: default_triple +# REQUIRES: object-emission fct_end: From f530e6e34ee19cfc6f08179879dab64207d51237 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Thu, 3 Nov 2022 18:40:28 -0700 Subject: [PATCH 337/516] [mlir] Drop `const` from `SymbolTableCollection::lookupSymbolIn` This function didn't work because it was marked const but both functions it could dispatch to are not. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D137380 --- mlir/include/mlir/IR/SymbolTable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h index 3cc48000e1944..a24693575f033 100644 --- a/mlir/include/mlir/IR/SymbolTable.h +++ b/mlir/include/mlir/IR/SymbolTable.h @@ -249,7 +249,7 @@ class SymbolTableCollection { Operation *lookupSymbolIn(Operation *symbolTableOp, StringAttr symbol); Operation *lookupSymbolIn(Operation *symbolTableOp, SymbolRefAttr name); template - T lookupSymbolIn(Operation *symbolTableOp, NameT &&name) const { + T lookupSymbolIn(Operation *symbolTableOp, NameT &&name) { return dyn_cast_or_null( lookupSymbolIn(symbolTableOp, std::forward(name))); } From a782922708af4e80bc9eaba977704420b6c765d9 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Sat, 5 Nov 2022 16:35:25 -0700 Subject: [PATCH 338/516] [mlir][SubElementInterfaces] Prefer calling the derived get if possible This allows for better supporting attributes/types that override the default builders. --- mlir/include/mlir/IR/SubElementInterfaces.h | 26 ++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/IR/SubElementInterfaces.h b/mlir/include/mlir/IR/SubElementInterfaces.h index ed387eb9a122e..07d246aafbfa7 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.h +++ b/mlir/include/mlir/IR/SubElementInterfaces.h @@ -220,6 +220,8 @@ template struct is_tuple : public std::false_type {}; template struct is_tuple> : public std::true_type {}; +template +using has_get_method = decltype(T::get(std::declval()...)); /// This function provides the underlying implementation for the /// SubElementInterface walk method, using the key type of the derived @@ -239,6 +241,23 @@ void walkImmediateSubElementsImpl(T derived, } } +/// This function invokes the proper `get` method for a type `T` with the given +/// values. +template +T constructSubElementReplacement(MLIRContext *ctx, Ts &&...params) { + // Prefer a direct `get` method if one exists. + if constexpr (llvm::is_detected::value) { + (void)ctx; + return T::get(std::forward(params)...); + } else if constexpr (llvm::is_detected::value) { + return T::get(ctx, std::forward(params)...); + } else { + // Otherwise, pass to the base get. + return T::Base::get(ctx, std::forward(params)...); + } +} + /// This function provides the underlying implementation for the /// SubElementInterface replace method, using the key type of the derived /// attribute/type to interact with the individual parameters. @@ -260,12 +279,13 @@ T replaceImmediateSubElementsImpl(T derived, ArrayRef &replAttrs, if constexpr (is_tuple::value) { return std::apply( [&](auto &&...params) { - return T::Base::get(derived.getContext(), - std::forward(params)...); + return constructSubElementReplacement( + derived.getContext(), + std::forward(params)...); }, newKey); } else { - return T::Base::get(derived.getContext(), newKey); + return constructSubElementReplacement(derived.getContext(), newKey); } } } From 0e18d5ed21c829220849bb2adcc17b2f8077bbae Mon Sep 17 00:00:00 2001 From: River Riddle Date: Sat, 5 Nov 2022 16:36:17 -0700 Subject: [PATCH 339/516] [mlir][SubElements] Re-add null guards to better enable downstream adoption We used to allow this, and it can break clients that still rely on it. --- mlir/lib/IR/SubElementInterfaces.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mlir/lib/IR/SubElementInterfaces.cpp b/mlir/lib/IR/SubElementInterfaces.cpp index ae0223f0936ef..fd05b9d01eea4 100644 --- a/mlir/lib/IR/SubElementInterfaces.cpp +++ b/mlir/lib/IR/SubElementInterfaces.cpp @@ -27,6 +27,11 @@ static void walkSubElementsImpl(InterfaceT interface, DenseSet &visitedTypes) { interface.walkImmediateSubElements( [&](Attribute attr) { + // Guard against potentially null inputs. This removes the need for the + // derived attribute/type to do it. + if (!attr) + return; + // Avoid infinite recursion when visiting sub attributes later, if this // is a mutable attribute. if (LLVM_UNLIKELY(attr.hasTrait())) { @@ -43,6 +48,11 @@ static void walkSubElementsImpl(InterfaceT interface, walkAttrsFn(attr); }, [&](Type type) { + // Guard against potentially null inputs. This removes the need for the + // derived attribute/type to do it. + if (!type) + return; + // Avoid infinite recursion when visiting sub types later, if this // is a mutable type. if (LLVM_UNLIKELY(type.hasTrait())) { @@ -93,6 +103,10 @@ static void updateSubElementImpl( return; newElements.push_back(element); + // Guard against potentially null inputs. We always map null to null. + if (!element) + return; + // Check for an existing mapping for this element, and walk it if we haven't // yet. T *mappedElement = &visited[element]; From 05a165bf0434e135c6a853731f92669b4b7fda98 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Sat, 5 Nov 2022 20:58:41 -0700 Subject: [PATCH 340/516] [llvm/cmake/config-ix.cmake] If `LD64_EXECUTABLE` is already set, avoid the need to look up `ld64` This provides option to set the `LD64_EXECUTABLE` variable to a path at CMake configure time directly. --- llvm/cmake/config-ix.cmake | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 74ffaf7bfdec7..ba85da6a8c3d0 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -622,15 +622,17 @@ if(CMAKE_GENERATOR MATCHES "Ninja" AND endif() if(CMAKE_HOST_APPLE AND APPLE) - if(NOT CMAKE_XCRUN) - find_program(CMAKE_XCRUN NAMES xcrun) - endif() - if(CMAKE_XCRUN) - execute_process(COMMAND ${CMAKE_XCRUN} -find ld - OUTPUT_VARIABLE LD64_EXECUTABLE - OUTPUT_STRIP_TRAILING_WHITESPACE) - else() - find_program(LD64_EXECUTABLE NAMES ld DOC "The ld64 linker") + if(NOT LD64_EXECUTABLE) + if(NOT CMAKE_XCRUN) + find_program(CMAKE_XCRUN NAMES xcrun) + endif() + if(CMAKE_XCRUN) + execute_process(COMMAND ${CMAKE_XCRUN} -find ld + OUTPUT_VARIABLE LD64_EXECUTABLE + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + find_program(LD64_EXECUTABLE NAMES ld DOC "The ld64 linker") + endif() endif() if(LD64_EXECUTABLE) From b5626ae9751f0d82aa04791a21689b289721738e Mon Sep 17 00:00:00 2001 From: Arnab Dutta Date: Sun, 6 Nov 2022 12:29:10 +0530 Subject: [PATCH 341/516] [MLIR] Fix bug in the method constructing semi affine expression from flattened form Set proper offset to the second element of the index pair when either lhs or rhs of a local expression is a dimensional identifier, so that we do not have same index values for more than one local expression. Reviewed By: springerm, hanchung Differential Revision: https://reviews.llvm.org/D137389 --- mlir/lib/IR/AffineExpr.cpp | 32 ++++++++++--------- .../Dialect/Affine/simplify-structures.mlir | 10 ++++++ 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index e0f45470bf3bd..00778cd47fdb0 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -986,18 +986,9 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // constant coefficient corresponding to the indices in `coefficients` map, // and affine expression corresponding to indices in `indexToExprMap` map. - for (unsigned j = 0; j < numDims; ++j) { - if (flatExprs[j] == 0) - continue; - // For dimensional expressions we set the index as , as we want dimensional expressions to appear before - // symbolic ones and products of dimensional and symbolic expressions - // having the dimension with the same position number. - std::pair indexEntry(j, -1); - addEntry(indexEntry, flatExprs[j], getAffineDimExpr(j, context)); - } // Ensure we do not have duplicate keys in `indexToExpr` map. - unsigned offset = 0; + unsigned offsetSym = 0; + signed offsetDim = -1; for (unsigned j = numDims; j < numDims + numSymbols; ++j) { if (flatExprs[j] == 0) continue; @@ -1006,7 +997,7 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // as we want symbolic expressions with the same positional number to // appear after dimensional expressions having the same positional number. std::pair indexEntry( - j - numDims, std::max(numDims, numSymbols) + offset++); + j - numDims, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[j], getAffineSymbolExpr(j - numDims, context)); } @@ -1038,13 +1029,13 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // constructing. When rhs is constant, we place 0 in place of keyB. if (lhs.isa()) { lhsPos = lhs.cast().getPosition(); - std::pair indexEntry(lhsPos, -1); + std::pair indexEntry(lhsPos, offsetDim--); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } else { lhsPos = lhs.cast().getPosition(); std::pair indexEntry( - lhsPos, std::max(numDims, numSymbols) + offset++); + lhsPos, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } @@ -1066,12 +1057,23 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, lhsPos = lhs.cast().getPosition(); rhsPos = rhs.cast().getPosition(); std::pair indexEntry( - lhsPos, std::max(numDims, numSymbols) + offset++); + lhsPos, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } addedToMap[it.index()] = true; } + for (unsigned j = 0; j < numDims; ++j) { + if (flatExprs[j] == 0) + continue; + // For dimensional expressions we set the index as , as we want dimensional expressions to appear before + // symbolic ones and products of dimensional and symbolic expressions + // having the dimension with the same position number. + std::pair indexEntry(j, offsetDim--); + addEntry(indexEntry, flatExprs[j], getAffineDimExpr(j, context)); + } + // Constructing the simplified semi-affine sum of product/division/mod // expression from the flattened form in the desired sorted order of indices // of the various individual product/division/mod expressions. diff --git a/mlir/test/Dialect/Affine/simplify-structures.mlir b/mlir/test/Dialect/Affine/simplify-structures.mlir index 903d11ea865fe..2c693ea1551c0 100644 --- a/mlir/test/Dialect/Affine/simplify-structures.mlir +++ b/mlir/test/Dialect/Affine/simplify-structures.mlir @@ -557,3 +557,13 @@ func.func @semiaffine_modulo(%arg0: index) -> index { // CHECK: affine.apply #[[$MAP]]()[%{{.*}}] return %a : index } + +// ----- + +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0, s1, s2] -> (s2 mod 2 + (s1 floordiv 2) * 2 + ((s2 floordiv 2) * s0) * 2)> +// CHECK-LABEL: func @semiaffine_modulo_dim +func.func @semiaffine_modulo_dim(%arg0: index, %arg1: index, %arg2: index) -> index { + %a = affine.apply affine_map<(d0)[s0, s1] -> (((d0 floordiv 2) * s0 + s1 floordiv 2) * 2 + d0 mod 2)> (%arg0)[%arg1, %arg2] + //CHECK: affine.apply #[[$MAP]]()[%{{.*}}, %{{.*}}, %{{.*}}] + return %a : index +} From a8d93783f37c042ace67069ae4ca6f8fd849c2d0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 5 Nov 2022 23:52:42 -0800 Subject: [PATCH 342/516] [mlir] Fix a warning This patch fixes: llvm-project/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp:1855:17: error: comparison of integers of different signs: 'int64_t' (aka 'long') and 'size_t' (aka 'unsigned long') [-Werror,-Wsign-compare] --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 6377a68bc3c5d..3069ddce0f5be 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1852,7 +1852,7 @@ LogicalResult BroadcastOp::verify() { auto inputShape = inputType.getShape(); auto initShape = initType.getShape(); - if (inputRank != dimensionsRef.size()) + if ((size_t)inputRank != dimensionsRef.size()) return emitOpError() << "input rank does match the number of dimensions. expected: " << inputRank << ", got: " << dimensionsRef.size(); From b6be37b0be5e1b524456c375700c76b7dae5171c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 30 Oct 2022 07:37:15 +0100 Subject: [PATCH 343/516] [llvm] [cmake] Set EXCLUDE_FROM_ALL on gtest and TestingSupport Exclude building googletest and LLVMTestingSupport libraries from the `all` target. If unittests are being built, these libraries will be built as a dependency anyway. If they are not being built, building them makes little sense as they are not installed or used otherwise. This will also make standalone builds of other projects easier, as it makes it possible to include these directories without having to cover them with additional conditions to prevent them from being built unconditionally. Differential Revision: https://reviews.llvm.org/D137035 --- llvm/lib/Testing/Support/CMakeLists.txt | 4 ++++ llvm/utils/unittest/CMakeLists.txt | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/llvm/lib/Testing/Support/CMakeLists.txt b/llvm/lib/Testing/Support/CMakeLists.txt index 81774eb171caa..385d8eca2daa7 100644 --- a/llvm/lib/Testing/Support/CMakeLists.txt +++ b/llvm/lib/Testing/Support/CMakeLists.txt @@ -1,3 +1,7 @@ +# Do not build unittest libraries automatically, they will be pulled in +# by unittests if these are built. +set(EXCLUDE_FROM_ALL ON) + add_llvm_library(LLVMTestingSupport Annotations.cpp Error.cpp diff --git a/llvm/utils/unittest/CMakeLists.txt b/llvm/utils/unittest/CMakeLists.txt index 0e54e0e57c358..302e9e0e84407 100644 --- a/llvm/utils/unittest/CMakeLists.txt +++ b/llvm/utils/unittest/CMakeLists.txt @@ -38,6 +38,10 @@ if (HAVE_LIBPTHREAD) list(APPEND LIBS pthread) endif() +# Do not build unittest libraries automatically, they will be pulled in +# by unittests if these are built. +set(EXCLUDE_FROM_ALL ON) + add_llvm_library(llvm_gtest googletest/src/gtest-all.cc googlemock/src/gmock-all.cc From 6106816459463072b862d4406f7e010a5b9cddb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 6 Nov 2022 09:06:16 +0100 Subject: [PATCH 344/516] Revert "[llvm] [cmake] Set EXCLUDE_FROM_ALL on gtest and TestingSupport" This reverts commit b6be37b0be5e1b524456c375700c76b7dae5171c. Still breaks flang. --- llvm/lib/Testing/Support/CMakeLists.txt | 4 ---- llvm/utils/unittest/CMakeLists.txt | 4 ---- 2 files changed, 8 deletions(-) diff --git a/llvm/lib/Testing/Support/CMakeLists.txt b/llvm/lib/Testing/Support/CMakeLists.txt index 385d8eca2daa7..81774eb171caa 100644 --- a/llvm/lib/Testing/Support/CMakeLists.txt +++ b/llvm/lib/Testing/Support/CMakeLists.txt @@ -1,7 +1,3 @@ -# Do not build unittest libraries automatically, they will be pulled in -# by unittests if these are built. -set(EXCLUDE_FROM_ALL ON) - add_llvm_library(LLVMTestingSupport Annotations.cpp Error.cpp diff --git a/llvm/utils/unittest/CMakeLists.txt b/llvm/utils/unittest/CMakeLists.txt index 302e9e0e84407..0e54e0e57c358 100644 --- a/llvm/utils/unittest/CMakeLists.txt +++ b/llvm/utils/unittest/CMakeLists.txt @@ -38,10 +38,6 @@ if (HAVE_LIBPTHREAD) list(APPEND LIBS pthread) endif() -# Do not build unittest libraries automatically, they will be pulled in -# by unittests if these are built. -set(EXCLUDE_FROM_ALL ON) - add_llvm_library(llvm_gtest googletest/src/gtest-all.cc googlemock/src/gmock-all.cc From bb635672b9d285db57d20b977e2603dded87df4c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 00:10:02 -0800 Subject: [PATCH 345/516] [llvm] Remove redundaunt typename (NFC) --- llvm/include/llvm/ADT/Sequence.h | 12 ++++++------ llvm/include/llvm/Support/TypeSize.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 88a6fa9205983..1153352d8b24f 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -125,8 +125,8 @@ template bool canTypeFitValue(const U Value) { // - its internal representation overflows. struct CheckedInt { // Integral constructor, asserts if Value cannot be represented as intmax_t. - template ::value, bool> = 0> + template ::value, bool> = 0> static CheckedInt from(Integral FromValue) { if (!canTypeFitValue(FromValue)) assertOutOfBounds(); @@ -137,7 +137,7 @@ struct CheckedInt { // Enum constructor, asserts if Value cannot be represented as intmax_t. template ::value, bool> = 0> + std::enable_if_t::value, bool> = 0> static CheckedInt from(Enum FromValue) { using type = std::underlying_type_t; return from(static_cast(FromValue)); @@ -162,8 +162,8 @@ struct CheckedInt { } // Convert to integral, asserts if Value cannot be represented as Integral. - template ::value, bool> = 0> + template ::value, bool> = 0> Integral to() const { if (!canTypeFitValue(Value)) assertOutOfBounds(); @@ -173,7 +173,7 @@ struct CheckedInt { // Convert to enum, asserts if Value cannot be represented as Enum's // underlying type. template ::value, bool> = 0> + std::enable_if_t::value, bool> = 0> Enum to() const { using type = std::underlying_type_t; return Enum(to()); diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 0777005643a71..9cf2e873d7189 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -93,7 +93,7 @@ class LinearPolyBase { } template - friend typename std::enable_if_t::value, LeafTy> + friend std::enable_if_t::value, LeafTy> operator-(const LeafTy &LHS) { LeafTy Copy = LHS; return Copy *= -1; From ad980b570217b6906e08c7cfbc67f5ee0ec35714 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Sun, 6 Nov 2022 09:56:19 +0100 Subject: [PATCH 346/516] [flang] Fix controlSuccessor chain for select rank construct Represent the select rank statement + select rank case statement the same way the select case statement and case statement are represented. controlSuccessor was not correctly attributed to the next type guard stmt. Similar to D137460 for select type construct. Reviewed By: vdonaldson Differential Revision: https://reviews.llvm.org/D137490 --- flang/lib/Lower/PFTBuilder.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 8f87e96c986ca..19b2512dc9224 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -900,8 +900,13 @@ class PFTBuilder { }, [&](const parser::SelectRankStmt &s) { insertConstructName(s, parentConstruct); + lastConstructStmtEvaluation = &eval; + }, + [&](const parser::SelectRankCaseStmt &) { + eval.isNewBlock = true; + lastConstructStmtEvaluation->controlSuccessor = &eval; + lastConstructStmtEvaluation = &eval; }, - [&](const parser::SelectRankCaseStmt &) { eval.isNewBlock = true; }, [&](const parser::SelectTypeStmt &s) { insertConstructName(s, parentConstruct); lastConstructStmtEvaluation = &eval; From 656f1d8b74df5d3f5f2bc75a5f2565df48340757 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Nov 2022 11:40:08 +0000 Subject: [PATCH 347/516] Revert "[SLP] Extend reordering data of tree entry to support PHI nodes" This reverts commit 87a20868eb2043420d48f591c3437472f7137834 as it has problems with scalable vectors and use-list orders. Test to follow. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 156 ++++++------------ .../AMDGPU/phi-result-use-order.ll | 10 +- 2 files changed, 51 insertions(+), 115 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f874cfca2876c..ba44d4a77ca3a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3795,49 +3795,6 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) { return None; } -/// Check if two insertelement instructions are from the same buildvector. -static bool areTwoInsertFromSameBuildVector( - InsertElementInst *VU, InsertElementInst *V, - function_ref GetBaseOperand) { - // Instructions must be from the same basic blocks. - if (VU->getParent() != V->getParent()) - return false; - // Checks if 2 insertelements are from the same buildvector. - if (VU->getType() != V->getType()) - return false; - // Multiple used inserts are separate nodes. - if (!VU->hasOneUse() && !V->hasOneUse()) - return false; - auto *IE1 = VU; - auto *IE2 = V; - unsigned Idx1 = *getInsertIndex(IE1); - unsigned Idx2 = *getInsertIndex(IE2); - // Go through the vector operand of insertelement instructions trying to find - // either VU as the original vector for IE2 or V as the original vector for - // IE1. - do { - if (IE2 == VU) - return VU->hasOneUse(); - if (IE1 == V) - return V->hasOneUse(); - if (IE1) { - if ((IE1 != VU && !IE1->hasOneUse()) || - getInsertIndex(IE1).value_or(Idx2) == Idx2) - IE1 = nullptr; - else - IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); - } - if (IE2) { - if ((IE2 != V && !IE2->hasOneUse()) || - getInsertIndex(IE2).value_or(Idx1) == Idx1) - IE2 = nullptr; - else - IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); - } - } while (IE1 || IE2); - return false; -} - Optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // No need to reorder if need to shuffle reuses, still need to shuffle the @@ -3901,58 +3858,6 @@ Optional BoUpSLP::getReorderingData(const TreeEntry &TE, (TopToBottom && isa(TE.getMainOp()))) && !TE.isAltShuffle()) return TE.ReorderIndices; - if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) { - auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) { - if (V1->user_empty() || V2->user_empty()) - return false; - auto *FirstUserOfPhi1 = cast(*V1->user_begin()); - auto *FirstUserOfPhi2 = cast(*V2->user_begin()); - if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { - if (!areTwoInsertFromSameBuildVector( - IE1, IE2, - [](InsertElementInst *II) { return II->getOperand(0); })) - return false; - Optional Idx1 = getInsertIndex(IE1); - Optional Idx2 = getInsertIndex(IE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { - if (EE1->getOperand(0) != EE2->getOperand(0)) - return false; - Optional Idx1 = getExtractIndex(EE1); - Optional Idx2 = getExtractIndex(EE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - return false; - }; - auto IsIdentityOrder = [](const OrdersType &Order) { - for (unsigned Idx : seq(0, Order.size())) - if (Idx != Order[Idx]) - return false; - return true; - }; - if (!TE.ReorderIndices.empty()) - return TE.ReorderIndices; - DenseMap PhiToId; - SmallVector Phis; - OrdersType ResOrder(TE.Scalars.size()); - for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) { - PhiToId[TE.Scalars[Id]] = Id; - Phis.push_back(TE.Scalars[Id]); - } - llvm::stable_sort(Phis, PHICompare); - for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id) - ResOrder[Id] = PhiToId[Phis[Id]]; - if (IsIdentityOrder(ResOrder)) - return {}; - return ResOrder; - } if (TE.State == TreeEntry::NeedToGather) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. @@ -4030,9 +3935,6 @@ void BoUpSLP::reorderTopToBottom() { // their ordering. DenseMap GathersToOrders; - // Phi nodes can have preferred ordering based on their result users - DenseMap PhisToOrders; - // AltShuffles can also have a preferred ordering that leads to fewer // instructions, e.g., the addsub instruction in x86. DenseMap AltShufflesToOrders; @@ -4047,7 +3949,7 @@ void BoUpSLP::reorderTopToBottom() { // extracts. for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries, &GathersToOrders, &ExternalUserReorderMap, - &AltShufflesToOrders, &PhisToOrders]( + &AltShufflesToOrders]( const std::unique_ptr &TE) { // Look for external users that will probably be vectorized. SmallVector ExternalUserReorderIndices = @@ -4104,9 +4006,6 @@ void BoUpSLP::reorderTopToBottom() { VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get()); if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); - if (TE->State == TreeEntry::Vectorize && - TE->getOpcode() == Instruction::PHI) - PhisToOrders.try_emplace(TE.get(), *CurrentOrder); } }); @@ -4132,8 +4031,8 @@ void BoUpSLP::reorderTopToBottom() { if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE)) continue; // Count number of orders uses. - const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders, - &PhisToOrders]() -> const OrdersType & { + const auto &Order = [OpTE, &GathersToOrders, + &AltShufflesToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather || !OpTE->ReuseShuffleIndices.empty()) { auto It = GathersToOrders.find(OpTE); @@ -4145,12 +4044,6 @@ void BoUpSLP::reorderTopToBottom() { if (It != AltShufflesToOrders.end()) return It->second; } - if (OpTE->State == TreeEntry::Vectorize && - isa(OpTE->getMainOp())) { - auto It = PhisToOrders.find(OpTE); - if (It != PhisToOrders.end()) - return It->second; - } return OpTE->ReorderIndices; }(); // First consider the order of the external scalar users. @@ -7245,6 +7138,49 @@ InstructionCost BoUpSLP::getSpillCost() const { return Cost; } +/// Check if two insertelement instructions are from the same buildvector. +static bool areTwoInsertFromSameBuildVector( + InsertElementInst *VU, InsertElementInst *V, + function_ref GetBaseOperand) { + // Instructions must be from the same basic blocks. + if (VU->getParent() != V->getParent()) + return false; + // Checks if 2 insertelements are from the same buildvector. + if (VU->getType() != V->getType()) + return false; + // Multiple used inserts are separate nodes. + if (!VU->hasOneUse() && !V->hasOneUse()) + return false; + auto *IE1 = VU; + auto *IE2 = V; + unsigned Idx1 = *getInsertIndex(IE1); + unsigned Idx2 = *getInsertIndex(IE2); + // Go through the vector operand of insertelement instructions trying to find + // either VU as the original vector for IE2 or V as the original vector for + // IE1. + do { + if (IE2 == VU) + return VU->hasOneUse(); + if (IE1 == V) + return V->hasOneUse(); + if (IE1) { + if ((IE1 != VU && !IE1->hasOneUse()) || + getInsertIndex(IE1).value_or(Idx2) == Idx2) + IE1 = nullptr; + else + IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); + } + if (IE2) { + if ((IE2 != V && !IE2->hasOneUse()) || + getInsertIndex(IE2).value_or(Idx1) == Idx1) + IE2 = nullptr; + else + IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); + } + } while (IE1 || IE2); + return false; +} + /// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the /// buildvector sequence. static bool isFirstInsertElement(const InsertElementInst *IE1, diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index f870fb3a9bc1f..5dff4be7493f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] @@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[TMP12]] From 0e9dfff37ef8f29cdda716d5ccb9d8e74d2a48fe Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 6 Nov 2022 12:06:12 +0000 Subject: [PATCH 348/516] [SLP][AArch64] Add a test case for SLP phi ordering of scalable vectors. NFC --- .../AArch64/phi-use-order-scalable.ll | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll new file mode 100644 index 0000000000000..8e30d1865bf61 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +define @scalable(i1 %c, i32 %srcALen, i32 %srcBLen) { +; CHECK-LABEL: @scalable( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END12:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN:%.*]], [[IF_THEN]] ], [ [[SRCBLEN:%.*]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_ELSE]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT78:%.*]] = insertelement poison, i32 [[SRCBLEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT82:%.*]] = insertelement poison, i32 [[SRCALEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT83:%.*]] = shufflevector [[BROADCAST_SPLATINSERT82]], poison, zeroinitializer +; CHECK-NEXT: ret [[BROADCAST_SPLAT83]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end12 + +if.else: ; preds = %entry + br label %if.end12 + +if.end12: ; preds = %if.else, %if.then + %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.else ] + %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.else ] + %broadcast.splatinsert78 = insertelement poison, i32 %srcBLen.addr.0, i64 0 + %broadcast.splatinsert82 = insertelement poison, i32 %srcALen.addr.0, i64 0 + %broadcast.splat83 = shufflevector %broadcast.splatinsert82, poison, zeroinitializer + ret %broadcast.splat83 +} + +define @multiuse(i1 %c, i32 %srcALen, i32 %srcBLen) { +; CHECK-LABEL: @multiuse( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END12:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN:%.*]], [[IF_THEN]] ], [ [[SRCBLEN:%.*]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_ELSE]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT78:%.*]] = insertelement poison, i32 [[SRCBLEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT82:%.*]] = insertelement poison, i32 [[SRCALEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT83:%.*]] = shufflevector [[BROADCAST_SPLATINSERT82]], poison, zeroinitializer +; CHECK-NEXT: [[X:%.*]] = add i32 [[SRCALEN_ADDR_0]], [[SRCBLEN_ADDR_0]] +; CHECK-NEXT: [[BROADCAST_SPLAT84:%.*]] = insertelement [[BROADCAST_SPLAT83]], i32 [[SRCBLEN_ADDR_0]], i64 1 +; CHECK-NEXT: ret [[BROADCAST_SPLAT84]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end12 + +if.else: ; preds = %entry + br label %if.end12 + +if.end12: ; preds = %if.else, %if.then + %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.else ] + %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.else ] + %broadcast.splatinsert78 = insertelement poison, i32 %srcBLen.addr.0, i64 0 + %broadcast.splatinsert82 = insertelement poison, i32 %srcALen.addr.0, i64 0 + %broadcast.splat83 = shufflevector %broadcast.splatinsert82, poison, zeroinitializer + %x = add i32 %srcALen.addr.0, %srcBLen.addr.0 + %broadcast.splat84 = insertelement %broadcast.splat83, i32 %srcBLen.addr.0, i64 1 + ret %broadcast.splat84 +} + From 244331ae833aaf33503bbd36890e704afb66a237 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Nov 2022 12:22:05 +0000 Subject: [PATCH 349/516] [X86] Fix scalar load latencies for WriteLoad scheduler class Atom was missing a load latency value (so was defaulting to 1cy) Znver1/Znver2 were using vector load latency values (which is what WriteFLoad*/WriteVecLoad* are for) instead of the scalar load latency value TBH I'm not sure clflush/clzero/prefetch ops should be tagged as WriteLoad but at least this makes us more consistent --- llvm/lib/Target/X86/X86ScheduleAtom.td | 2 +- llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 +- llvm/lib/Target/X86/X86ScheduleZnver2.td | 2 +- llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s | 4 ++-- .../test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s | 4 ++-- .../test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s | 4 ++-- 20 files changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 09a09185b3bcd..85e08b93a4a6c 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -168,7 +168,7 @@ defm : X86WriteRes; // Loads, stores, and moves, not folded with other operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes; +def : WriteRes { let Latency = 3; } def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 78b32953cdf80..d6ea83b52257d 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -177,7 +177,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index d6b0d2dd191a1..9ebedb76b9e37 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -176,7 +176,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s index 25011cd1aea89..9a60718100851 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 3.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 1 1.00 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 1 1.00 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 1 1.00 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 1 1.00 * * prefetchnta (%rax) +# CHECK-NEXT: 1 3 1.00 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 3 1.00 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 3 1.00 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 3 1.00 * * prefetchnta (%rax) # CHECK-NEXT: 1 4 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s index 82d40dca00f7c..65d785038f32c 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 * * U clflush (%rax) +# CHECK-NEXT: 1 3 1.00 * * U clflush (%rax) # CHECK-NEXT: 3 6 5.00 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 4 7 6.00 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s index 07e8531e30e45..ead0c4268759e 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 2 1.00 * U fisttpl (%ecx) # CHECK-NEXT: 1 2 1.00 * U fisttpll (%eax) # CHECK-NEXT: 1 1 1.00 U fld %st(0) -# CHECK-NEXT: 1 1 1.00 * U flds (%edx) -# CHECK-NEXT: 1 1 1.00 * U fldl (%ecx) +# CHECK-NEXT: 1 3 1.00 * U flds (%edx) +# CHECK-NEXT: 1 3 1.00 * U fldl (%ecx) # CHECK-NEXT: 1 4 2.00 * U fldt (%eax) # CHECK-NEXT: 1 5 2.50 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.50 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s index 733aec155ec49..4c16bafb6377d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s index 420942130645b..70502433eefc7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 U clzero +# CHECK-NEXT: 1 4 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s index 47a52fb06385a..7f5ec3104f09d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s index 5616e648f4314..3bf248b044b85 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.50 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index 1db51b7b65147..c6bfe9a12137b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.50 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s index b190803318a10..7eadac52bfb3a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s @@ -1400,16 +1400,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index 0c26a40849d62..6cfa018e6dbca 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.50 * U flds (%edx) -# CHECK-NEXT: 1 8 0.50 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.50 * U flds (%edx) +# CHECK-NEXT: 1 4 0.50 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s index 461c0109254e1..671381f78a953 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s index 83de8d3a691d6..12c4f757551b1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 U clzero +# CHECK-NEXT: 1 4 0.33 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s index 2e240b3af8e83..b405f4c29e0f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s index 030b6521d628e..64c3ae95ba0d4 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.33 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s index a0535cfa0d02d..9a465802f8b17 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s index 7ccf6efb43e36..3cd41c176ce04 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1202,16 +1202,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.33 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.33 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s index be542ecb2debc..1987176040002 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.33 * U flds (%edx) -# CHECK-NEXT: 1 8 0.33 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.33 * U flds (%edx) +# CHECK-NEXT: 1 4 0.33 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) From 6fff3babb426c7274268a9b4a93203323882444e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Nov 2022 13:16:23 +0000 Subject: [PATCH 350/516] Revert rG244331ae833aaf33503bbd36890e704afb66a237 "[X86] Fix scalar load latencies for WriteLoad scheduler class" Forgot to update tests outside the llvm-mca test folder :-( --- llvm/lib/Target/X86/X86ScheduleAtom.td | 2 +- llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 +- llvm/lib/Target/X86/X86ScheduleZnver2.td | 2 +- llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s | 4 ++-- .../test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s | 4 ++-- .../test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s | 4 ++-- 20 files changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 85e08b93a4a6c..09a09185b3bcd 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -168,7 +168,7 @@ defm : X86WriteRes; // Loads, stores, and moves, not folded with other operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes { let Latency = 3; } +def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index d6ea83b52257d..78b32953cdf80 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -177,7 +177,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 8; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 9ebedb76b9e37..d6b0d2dd191a1 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -176,7 +176,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 8; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s index 9a60718100851..25011cd1aea89 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 3.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 3 1.00 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 3 1.00 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 3 1.00 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 3 1.00 * * prefetchnta (%rax) +# CHECK-NEXT: 1 1 1.00 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 1 1.00 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 1 1.00 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 1 1.00 * * prefetchnta (%rax) # CHECK-NEXT: 1 4 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 4 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s index 65d785038f32c..82d40dca00f7c 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 * * U clflush (%rax) +# CHECK-NEXT: 1 1 1.00 * * U clflush (%rax) # CHECK-NEXT: 3 6 5.00 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 4 7 6.00 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s index ead0c4268759e..07e8531e30e45 100644 --- a/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Atom/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 2 1.00 * U fisttpl (%ecx) # CHECK-NEXT: 1 2 1.00 * U fisttpll (%eax) # CHECK-NEXT: 1 1 1.00 U fld %st(0) -# CHECK-NEXT: 1 3 1.00 * U flds (%edx) -# CHECK-NEXT: 1 3 1.00 * U fldl (%ecx) +# CHECK-NEXT: 1 1 1.00 * U flds (%edx) +# CHECK-NEXT: 1 1 1.00 * U fldl (%ecx) # CHECK-NEXT: 1 4 2.00 * U fldt (%eax) # CHECK-NEXT: 1 5 2.50 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.50 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s index 4c16bafb6377d..733aec155ec49 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 * * U clflushopt (%rax) +# CHECK-NEXT: 1 8 0.50 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s index 70502433eefc7..420942130645b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 U clzero +# CHECK-NEXT: 1 8 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s index 7f5ec3104f09d..47a52fb06385a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 * * prefetch (%rax) -# CHECK-NEXT: 1 4 0.50 * * prefetchw (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetch (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s index 3bf248b044b85..5616e648f4314 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.50 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 4 0.50 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 4 0.50 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 4 0.50 * * prefetchnta (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 8 0.50 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index c6bfe9a12137b..1db51b7b65147 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.50 * * U clflush (%rax) +# CHECK-NEXT: 1 8 0.50 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s index 7eadac52bfb3a..b190803318a10 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s @@ -1400,16 +1400,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 8 0.50 * movsbl (%rax), %edi +# CHECK-NEXT: 1 8 0.50 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 8 0.50 * movswl (%rax), %edi +# CHECK-NEXT: 1 8 0.50 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index 6cfa018e6dbca..0c26a40849d62 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 4 0.50 * U flds (%edx) -# CHECK-NEXT: 1 4 0.50 * U fldl (%ecx) +# CHECK-NEXT: 1 8 0.50 * U flds (%edx) +# CHECK-NEXT: 1 8 0.50 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s index 671381f78a953..461c0109254e1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 * * U clflushopt (%rax) +# CHECK-NEXT: 1 8 0.33 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s index 12c4f757551b1..83de8d3a691d6 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 U clzero +# CHECK-NEXT: 1 8 0.33 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s index b405f4c29e0f0..2e240b3af8e83 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 * * prefetch (%rax) -# CHECK-NEXT: 1 4 0.33 * * prefetchw (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetch (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s index 64c3ae95ba0d4..030b6521d628e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 4 0.33 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 4 0.33 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 4 0.33 * * prefetchnta (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 8 0.33 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s index 9a465802f8b17..a0535cfa0d02d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 * * U clflush (%rax) +# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s index 3cd41c176ce04..7ccf6efb43e36 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1202,16 +1202,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi +# CHECK-NEXT: 1 8 0.33 * movsbl (%rax), %edi +# CHECK-NEXT: 1 8 0.33 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi +# CHECK-NEXT: 1 8 0.33 * movswl (%rax), %edi +# CHECK-NEXT: 1 8 0.33 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s index 1987176040002..be542ecb2debc 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 4 0.33 * U flds (%edx) -# CHECK-NEXT: 1 4 0.33 * U fldl (%ecx) +# CHECK-NEXT: 1 8 0.33 * U flds (%edx) +# CHECK-NEXT: 1 8 0.33 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) From d84cdf5d285bdd0989c595a82ff609cfab09f8bc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Nov 2022 13:35:46 +0000 Subject: [PATCH 351/516] [X86] Regenerate atom-pad-short-functions.ll test checks --- .../CodeGen/X86/atom-pad-short-functions.ll | 122 +++++++++++------- 1 file changed, 73 insertions(+), 49 deletions(-) diff --git a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll b/llvm/test/CodeGen/X86/atom-pad-short-functions.ll index c1bf727a176cc..c107dfff28ca3 100644 --- a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll +++ b/llvm/test/CodeGen/X86/atom-pad-short-functions.ll @@ -1,64 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O1 -mcpu=atom -mtriple=i686-linux | FileCheck %s declare void @external_function(...) define i32 @test_return_val(i32 %a) nounwind { -; CHECK: test_return_val -; CHECK: movl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret +; CHECK-LABEL: test_return_val: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl ret i32 %a } define i32 @test_optsize(i32 %a) nounwind optsize { -; CHECK: test_optsize -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_optsize: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_minsize(i32 %a) nounwind minsize { -; CHECK: test_minsize -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_minsize: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_pgso(i32 %a) nounwind !prof !14 { -; CHECK: test_pgso -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_pgso: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_add(i32 %a, i32 %b) nounwind { -; CHECK: test_add -; CHECK: addl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret +; CHECK-LABEL: test_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %result = add i32 %a, %b ret i32 %result } define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind { -; CHECK: @test_multiple_ret -; CHECK: je +; CHECK-LABEL: test_multiple_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB5_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB5_2: # %bb2 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl -; CHECK: nop -; CHECK: nop -; CHECK: ret -; CHECK: nop -; CHECK: nop -; CHECK: ret br i1 %c, label %bb1, label %bb2 @@ -69,41 +82,52 @@ bb2: ret i32 %b } -define void @test_call_others(i32 %x) nounwind -{ -; CHECK: test_call_others -; CHECK: je +define void @test_call_others(i32 %x) nounwind { +; CHECK-LABEL: test_call_others: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB6_1 +; CHECK-NEXT: # %bb.2: # %true.case +; CHECK-NEXT: jmp external_function@PLT # TAILCALL +; CHECK-NEXT: .LBB6_1: # %if.end +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %if.end, label %true.case -; CHECK: jmp external_function true.case: tail call void @external_function() nounwind br label %if.end -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret if.end: ret void } define void @test_branch_to_same_bb(i32 %x, i32 %y) nounwind { -; CHECK: @test_branch_to_same_bb +; CHECK-LABEL: test_branch_to_same_bb: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jle .LBB7_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_1: # %while.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB7_1 +; CHECK-NEXT: .LBB7_2: # %while.end +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %cmp = icmp sgt i32 %x, 0 br i1 %cmp, label %while.cond, label %while.end while.cond: br label %while.cond -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret while.end: ret void } From 08fe55b346cbb3a5126757ca5995ed22771d0326 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Nov 2022 14:03:59 +0000 Subject: [PATCH 352/516] [X86] Fix scalar load latencies for WriteLoad scheduler class Znver1/Znver2 were using vector load latency values (which is what WriteFLoad*/WriteVecLoad* are for) instead of the scalar load latency value TBH I'm not sure clflush/clzero/prefetch ops should be tagged as WriteLoad but at least this makes us more consistent --- llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 +- llvm/lib/Target/X86/X86ScheduleZnver2.td | 2 +- .../test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s | 4 ++-- .../test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s | 4 ++-- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s | 2 +- llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s | 8 ++++---- llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s | 4 ++-- 16 files changed, 32 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 78b32953cdf80..d6ea83b52257d 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -177,7 +177,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index d6b0d2dd191a1..9ebedb76b9e37 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -176,7 +176,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s index 733aec155ec49..4c16bafb6377d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s index 420942130645b..70502433eefc7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 U clzero +# CHECK-NEXT: 1 4 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s index 47a52fb06385a..7f5ec3104f09d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s index 5616e648f4314..3bf248b044b85 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.50 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index 1db51b7b65147..c6bfe9a12137b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.50 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s index b190803318a10..7eadac52bfb3a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s @@ -1400,16 +1400,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index 0c26a40849d62..6cfa018e6dbca 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.50 * U flds (%edx) -# CHECK-NEXT: 1 8 0.50 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.50 * U flds (%edx) +# CHECK-NEXT: 1 4 0.50 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s index 461c0109254e1..671381f78a953 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s index 83de8d3a691d6..12c4f757551b1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 U clzero +# CHECK-NEXT: 1 4 0.33 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s index 2e240b3af8e83..b405f4c29e0f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s index 030b6521d628e..64c3ae95ba0d4 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.33 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s index a0535cfa0d02d..9a465802f8b17 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s index 7ccf6efb43e36..3cd41c176ce04 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1202,16 +1202,16 @@ xorq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.33 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi # CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi # CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.33 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi # CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s index be542ecb2debc..1987176040002 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.33 * U flds (%edx) -# CHECK-NEXT: 1 8 0.33 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.33 * U flds (%edx) +# CHECK-NEXT: 1 4 0.33 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) From 92c5bcb7bce77ba2b9612169cab12fa52737e15a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 4 Nov 2022 11:17:44 -0400 Subject: [PATCH 353/516] [InstCombine] add tests for zext of and of trunc; NFC The basic one-use version of this sequence is reduced, but we don't transform these currently. --- llvm/test/Transforms/InstCombine/zext.ll | 101 ++++++++++++++++++++++- 1 file changed, 98 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll index 765ae1a1b64a5..06188de5751ed 100644 --- a/llvm/test/Transforms/InstCombine/zext.ll +++ b/llvm/test/Transforms/InstCombine/zext.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s +declare void @use1(i1) +declare void @use32(i32) +declare void @use_vec(<2 x i9>) + define i64 @test_sext_zext(i16 %A) { ; CHECK-LABEL: @test_sext_zext( ; CHECK-NEXT: [[C2:%.*]] = zext i16 [[A:%.*]] to i64 @@ -172,9 +176,6 @@ define i47 @sext_zext_apint2(i11 %A) { ret i47 %c2 } -declare void @use1(i1) -declare void @use32(i32) - define i32 @masked_bit_set(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] @@ -535,3 +536,97 @@ join: %conv4 = zext i1 %x1 to i16 ret i16 %conv4 } + +define i64 @and_trunc_extra_use1(i64 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use1( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + call void @use32(i32 %t) + %a = and i32 %t, %y + %z = zext i32 %a to i64 + ret i64 %z +} + +define i64 @and_trunc_extra_use1_commute(i64 %x, i32 %p) { +; CHECK-LABEL: @and_trunc_extra_use1_commute( +; CHECK-NEXT: [[Y:%.*]] = mul i32 [[P:%.*]], [[P]] +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %y = mul i32 %p, %p ; thwart complexity-based canonicalization + %t = trunc i64 %x to i32 + call void @use32(i32 %t) + %a = and i32 %y, %t + %z = zext i32 %a to i64 + ret i64 %z +} + +define i64 @and_trunc_extra_use2(i64 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use2( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: call void @use32(i32 [[A]]) +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + %a = and i32 %t, %y + call void @use32(i32 %a) + %z = zext i32 %a to i64 + ret i64 %z +} + +define i64 @and_trunc_extra_use2_constant(i64 %x) { +; CHECK-LABEL: @and_trunc_extra_use2_constant( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], 42 +; CHECK-NEXT: call void @use32(i32 [[A]]) +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + %a = and i32 %t, 42 + call void @use32(i32 %a) + %z = zext i32 %a to i64 + ret i64 %z +} + +define <2 x i17> @and_trunc_extra_use3_constant_vec(<2 x i17> %x) { +; CHECK-LABEL: @and_trunc_extra_use3_constant_vec( +; CHECK-NEXT: [[T:%.*]] = trunc <2 x i17> [[X:%.*]] to <2 x i9> +; CHECK-NEXT: call void @use_vec(<2 x i9> [[T]]) +; CHECK-NEXT: [[A:%.*]] = and <2 x i9> [[T]], +; CHECK-NEXT: call void @use_vec(<2 x i9> [[A]]) +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i9> [[A]] to <2 x i17> +; CHECK-NEXT: ret <2 x i17> [[Z]] +; + %t = trunc <2 x i17> %x to <2 x i9> + call void @use_vec(<2 x i9> %t) + %a = and <2 x i9> %t, + call void @use_vec(<2 x i9> %a) + %z = zext <2 x i9> %a to <2 x i17> + ret <2 x i17> %z +} + +define i64 @and_trunc_extra_use1_wider_src(i65 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use1_wider_src( +; CHECK-NEXT: [[T:%.*]] = trunc i65 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i65 %x to i32 + call void @use32(i32 %t) + %a = and i32 %t, %y + %z = zext i32 %a to i64 + ret i64 %z +} From 1c6ebe29d3c03382a62985fab764f5641db7f875 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Nov 2022 08:26:39 -0500 Subject: [PATCH 354/516] [InstCombine] reduce multi-use casts+masks As noted in the code comment, we could generalize this: https://alive2.llvm.org/ce/z/N5m-eZ It saves an instruction even without a constant operand, but the 'and' is wider. We can do that as another step if it doesn't harm anything. I noticed that this missing pattern with a constant operand inhibited other transforms in a recent bug report, so this is enough to solve that case. --- .../Transforms/InstCombine/InstCombineCasts.cpp | 11 +++++++++++ llvm/test/Transforms/InstCombine/zext.ll | 16 ++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index c91786148beb4..dfe49fb525fc1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1328,6 +1328,17 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC); } + // If we are truncating, masking, and then zexting back to the original type, + // that's just a mask. This is not handled by canEvaluateZextd if the + // intermediate values have extra uses. This could be generalized further for + // a non-constant mask operand. + // zext (and (trunc X), C) --> and X, (zext C) + if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) && + X->getType() == DestTy) { + Constant *ZextC = ConstantExpr::getZExt(C, DestTy); + return BinaryOperator::CreateAnd(X, ZextC); + } + if (match(Src, m_VScale(DL))) { if (CI.getFunction() && CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll index 06188de5751ed..25f59d5570ee2 100644 --- a/llvm/test/Transforms/InstCombine/zext.ll +++ b/llvm/test/Transforms/InstCombine/zext.ll @@ -537,6 +537,8 @@ join: ret i16 %conv4 } +; negative test - but this could be transformed to eliminate a use of 't' + define i64 @and_trunc_extra_use1(i64 %x, i32 %y) { ; CHECK-LABEL: @and_trunc_extra_use1( ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 @@ -552,6 +554,8 @@ define i64 @and_trunc_extra_use1(i64 %x, i32 %y) { ret i64 %z } +; negative test - but this could be transformed to eliminate a use of 't' + define i64 @and_trunc_extra_use1_commute(i64 %x, i32 %p) { ; CHECK-LABEL: @and_trunc_extra_use1_commute( ; CHECK-NEXT: [[Y:%.*]] = mul i32 [[P:%.*]], [[P]] @@ -569,6 +573,8 @@ define i64 @and_trunc_extra_use1_commute(i64 %x, i32 %p) { ret i64 %z } +; negative test - avoid creating an extra instruction + define i64 @and_trunc_extra_use2(i64 %x, i32 %y) { ; CHECK-LABEL: @and_trunc_extra_use2( ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 @@ -584,12 +590,14 @@ define i64 @and_trunc_extra_use2(i64 %x, i32 %y) { ret i64 %z } +; With constant mask, we duplicate it as a wider constant. + define i64 @and_trunc_extra_use2_constant(i64 %x) { ; CHECK-LABEL: @and_trunc_extra_use2_constant( ; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 ; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], 42 ; CHECK-NEXT: call void @use32(i32 [[A]]) -; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: [[Z:%.*]] = and i64 [[X]], 42 ; CHECK-NEXT: ret i64 [[Z]] ; %t = trunc i64 %x to i32 @@ -599,13 +607,15 @@ define i64 @and_trunc_extra_use2_constant(i64 %x) { ret i64 %z } +; Works with arbitrary vectors and verify that the constant is zext. + define <2 x i17> @and_trunc_extra_use3_constant_vec(<2 x i17> %x) { ; CHECK-LABEL: @and_trunc_extra_use3_constant_vec( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i17> [[X:%.*]] to <2 x i9> ; CHECK-NEXT: call void @use_vec(<2 x i9> [[T]]) ; CHECK-NEXT: [[A:%.*]] = and <2 x i9> [[T]], ; CHECK-NEXT: call void @use_vec(<2 x i9> [[A]]) -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i9> [[A]] to <2 x i17> +; CHECK-NEXT: [[Z:%.*]] = and <2 x i17> [[X]], ; CHECK-NEXT: ret <2 x i17> [[Z]] ; %t = trunc <2 x i17> %x to <2 x i9> @@ -616,6 +626,8 @@ define <2 x i17> @and_trunc_extra_use3_constant_vec(<2 x i17> %x) { ret <2 x i17> %z } +; negative test - would require another cast + define i64 @and_trunc_extra_use1_wider_src(i65 %x, i32 %y) { ; CHECK-LABEL: @and_trunc_extra_use1_wider_src( ; CHECK-NEXT: [[T:%.*]] = trunc i65 [[X:%.*]] to i32 From bff6880a5f890b8d01d9de26d3482d3a145157c0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Nov 2022 09:00:04 -0500 Subject: [PATCH 355/516] [SimplifyLibCalls] improve code readability for AttributeList propagation; NFC It is possible that we can do better on some of these transforms by passing some subset of attributes, but we were not doing that in any of the changed code. So it's better to give that a name to indicate we're clearing attributes or make that more obvious by using the default-constructed empty list. --- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 7922f785c338a..6dcf5a3c68136 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1856,7 +1856,6 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { Module *M = Pow->getModule(); Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); bool Ignored; @@ -1881,8 +1880,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { LibFunc LibFn; Function *CalleeFn = BaseFn->getCalledFunction(); - if (CalleeFn && - TLI->getLibFunc(CalleeFn->getName(), LibFn) && + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && isLibFuncEmittable(M, TLI, LibFn)) { StringRef ExpName; Intrinsic::ID ID; @@ -1892,14 +1890,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { switch (LibFn) { default: return nullptr; - case LibFunc_expf: case LibFunc_exp: case LibFunc_expl: + case LibFunc_expf: + case LibFunc_exp: + case LibFunc_expl: ExpName = TLI->getName(LibFunc_exp); ID = Intrinsic::exp; LibFnFloat = LibFunc_expf; LibFnDouble = LibFunc_exp; LibFnLongDouble = LibFunc_expl; break; - case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l: + case LibFunc_exp2f: + case LibFunc_exp2: + case LibFunc_exp2l: ExpName = TLI->getName(LibFunc_exp2); ID = Intrinsic::exp2; LibFnFloat = LibFunc_exp2f; @@ -1932,6 +1934,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) return nullptr; + AttributeList NoAttrs; // Attributes are only meaningful on the original call + // pow(2.0, itofp(x)) -> ldexp(1.0, x) if (match(Base, m_SpecificFP(2.0)) && (isa(Expo) || isa(Expo)) && @@ -1940,7 +1944,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { return copyFlags(*Pow, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf, - LibFunc_ldexpl, B, Attrs)); + LibFunc_ldexpl, B, NoAttrs)); } // pow(2.0 ** n, x) -> exp2(n * x) @@ -1964,7 +1968,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { else return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs)); + LibFunc_exp2l, B, NoAttrs)); } } @@ -1974,7 +1978,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, - B, Attrs)); + B, NoAttrs)); // pow(x, y) -> exp2(log2(x) * y) if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() && @@ -2000,7 +2004,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { LibFunc_exp2l)) return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs)); + LibFunc_exp2l, B, NoAttrs)); } } @@ -2032,7 +2036,6 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); @@ -2054,7 +2057,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { !isKnownNeverInfinity(Base, TLI)) return nullptr; - Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B, + TLI); if (!Sqrt) return nullptr; @@ -2205,7 +2209,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); - AttributeList Attrs; // Attributes are only meaningful on the original call StringRef Name = Callee->getName(); Value *Ret = nullptr; if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && @@ -2215,14 +2218,14 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); - // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize - // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize + // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize + // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize if ((isa(Op) || isa(Op)) && hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, - LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, - B, Attrs); + LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, AttributeList()); } return Ret; @@ -2260,7 +2263,6 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Function *LogFn = Log->getCalledFunction(); - AttributeList Attrs; // Attributes are only meaningful on the original call StringRef LogNm = LogFn->getName(); Intrinsic::ID LogID = LogFn->getIntrinsicID(); Module *Mod = Log->getModule(); @@ -2371,12 +2373,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { TLI->getLibFunc(*Arg, ArgLb); // log(pow(x,y)) -> y*log(x) + AttributeList NoAttrs; if (ArgLb == PowLb || ArgID == Intrinsic::pow) { Value *LogX = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Arg->getOperand(0), "log") - : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, Attrs); + : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); // Since pow() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. @@ -2399,7 +2402,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Value *LogE = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Eul, "log") - : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, Attrs); + : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); // Since exp() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. From 4e56aa252fc983574e32a0cb8b73333831f66700 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Nov 2022 14:32:00 +0000 Subject: [PATCH 356/516] [X86] Schedule scalar movsx/movzx load+extend ops as WriteLoad instead of WriteALULd Although some very old x86 hardware would perform the extension as a later stage, every target we have a scheduler for always performs this as part of the load-op (avoid ALU pipes etc.). If anyone wants to model very old hardware they can always override this. This patch just tags these as WriteLoad directly and removes unnecessary overrides - this cleans up some latency/throughput tests as they aren't being badly modelled as folded ALU ops --- llvm/lib/Target/X86/X86InstrExtension.td | 34 +++++++------- llvm/lib/Target/X86/X86SchedBroadwell.td | 2 - llvm/lib/Target/X86/X86SchedHaswell.td | 4 +- llvm/lib/Target/X86/X86SchedIceLake.td | 4 +- llvm/lib/Target/X86/X86SchedSandyBridge.td | 8 ---- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 8 ---- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 8 ---- llvm/lib/Target/X86/X86ScheduleZnver1.td | 4 -- llvm/lib/Target/X86/X86ScheduleZnver2.td | 4 -- .../llvm-mca/X86/BdVer2/resources-x86_64.s | 46 +++++++++---------- .../llvm-mca/X86/BtVer2/resources-x86_64.s | 46 +++++++++---------- .../tools/llvm-mca/X86/SLM/resources-x86_64.s | 46 +++++++++---------- .../llvm-mca/X86/Znver1/resources-x86_64.s | 30 ++++++------ .../llvm-mca/X86/Znver2/resources-x86_64.s | 30 ++++++------ .../llvm-mca/X86/Znver3/resources-x86_64.s | 20 ++++---- 15 files changed, 128 insertions(+), 166 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrExtension.td b/llvm/lib/Target/X86/X86InstrExtension.td index 7a4eb138ec346..8d3fce7f55bc6 100644 --- a/llvm/lib/Target/X86/X86InstrExtension.td +++ b/llvm/lib/Target/X86/X86InstrExtension.td @@ -42,7 +42,7 @@ def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, - TB, OpSize16, Sched<[WriteALULd]>; + TB, OpSize16, Sched<[WriteLoad]>; } // hasSideEffects = 0 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", @@ -51,7 +51,7 @@ def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB, - OpSize32, Sched<[WriteALULd]>; + OpSize32, Sched<[WriteLoad]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR16:$src))]>, TB, @@ -59,7 +59,7 @@ def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, - OpSize32, TB, Sched<[WriteALULd]>; + OpSize32, TB, Sched<[WriteLoad]>; let hasSideEffects = 0 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), @@ -68,7 +68,7 @@ def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, - TB, OpSize16, Sched<[WriteALULd]>; + TB, OpSize16, Sched<[WriteLoad]>; } // hasSideEffects = 0 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", @@ -77,7 +77,7 @@ def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB, - OpSize32, Sched<[WriteALULd]>; + OpSize32, Sched<[WriteLoad]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR16:$src))]>, TB, @@ -85,7 +85,7 @@ def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, - TB, OpSize32, Sched<[WriteALULd]>; + TB, OpSize32, Sched<[WriteLoad]>; // These instructions exist as a consequence of operand size prefix having // control of the destination size, but not the input size. Only support them @@ -100,10 +100,10 @@ def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), let mayLoad = 1 in { def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movs{ww|x}\t{$src, $dst|$dst, $src}", - []>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable; + []>, OpSize16, TB, Sched<[WriteLoad]>, NotMemoryFoldable; def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movz{ww|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable; + []>, TB, OpSize16, Sched<[WriteLoad]>, NotMemoryFoldable; } // mayLoad = 1 } // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 @@ -119,7 +119,7 @@ let mayLoad = 1 in def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize32, Sched<[WriteALULd]>; + []>, TB, OpSize32, Sched<[WriteLoad]>; def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), @@ -129,7 +129,7 @@ let mayLoad = 1 in def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize32, Sched<[WriteALULd]>; + []>, TB, OpSize32, Sched<[WriteLoad]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -143,7 +143,7 @@ def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sext GR16:$src))]>, TB, @@ -151,7 +151,7 @@ def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sext GR32:$src))]>, @@ -159,7 +159,7 @@ def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i32 addr:$src))]>, - Sched<[WriteALULd]>, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, Requires<[In64BitMode]>; // These instructions exist as a consequence of operand size prefix having // control of the destination size, but not the input size. Only support them @@ -174,10 +174,10 @@ def MOVSX32rr32: I<0x63, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), let mayLoad = 1 in { def MOVSX16rm32: I<0x63, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>, - Sched<[WriteALULd]>, OpSize16, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, OpSize16, Requires<[In64BitMode]>; def MOVSX32rm32: I<0x63, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>, - Sched<[WriteALULd]>, OpSize32, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, OpSize32, Requires<[In64BitMode]>; } // mayLoad = 1 } // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 @@ -189,14 +189,14 @@ def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; } // 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index d4ffdea79c5cf..01c84048c60ba 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -908,8 +908,6 @@ def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm, VMOVDDUPrm, MOVDDUPrm, VMOVSHDUPrm, MOVSHDUPrm, diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 13b0ed25361e9..44fc1acf6b742 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -886,9 +886,7 @@ def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>; def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { let Latency = 1; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index fe812a2d71ecf..43fb6eeacc256 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1071,9 +1071,7 @@ def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[ICXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71? +def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71? def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> { let Latency = 5; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index b1dd52da3fa23..9d7069a277eb6 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -736,14 +736,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> { let Latency = 5; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 59d7c61a3f08a..e92a5a87c7da6 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -928,14 +928,6 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 5; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index da1b47e98d774..5ee909b49d098 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1048,14 +1048,6 @@ def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 5; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index d6ea83b52257d..705100d85f361 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -523,10 +523,6 @@ def : SchedAlias; // r16,m. def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>; -// MOVSX, MOVZX. -// r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; - // XCHG. // r,m. def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> { diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 9ebedb76b9e37..87a953cef33a7 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -522,10 +522,6 @@ def : SchedAlias; // r16,m. def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>; -// MOVSX, MOVZX. -// r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; - // XCHG. // r,r. def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> { diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s index b60a873b59d4c..20828a798b297 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.50 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 1.00 movsbw %al, %di # CHECK-NEXT: 1 1 1.00 movzbw %al, %di -# CHECK-NEXT: 1 5 1.50 * movsbw (%rax), %di -# CHECK-NEXT: 1 5 1.50 * movzbw (%rax), %di +# CHECK-NEXT: 1 5 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 5 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 1.00 movsbl %al, %edi # CHECK-NEXT: 1 1 1.00 movzbl %al, %edi -# CHECK-NEXT: 1 5 1.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 5 1.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 1.00 movsbq %al, %rdi # CHECK-NEXT: 1 1 1.00 movzbq %al, %rdi -# CHECK-NEXT: 1 5 1.50 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 5 1.50 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 1.00 movswl %ax, %edi # CHECK-NEXT: 1 1 1.00 movzwl %ax, %edi -# CHECK-NEXT: 1 5 1.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 5 1.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 1.00 movswq %ax, %rdi # CHECK-NEXT: 1 1 1.00 movzwq %ax, %rdi -# CHECK-NEXT: 1 5 1.50 * movswq (%rax), %rdi -# CHECK-NEXT: 1 5 1.50 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 1.00 movslq %eax, %rdi -# CHECK-NEXT: 1 5 1.50 * movslq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 4.00 mulb %dil # CHECK-NEXT: 1 8 4.00 * mulb (%rax) # CHECK-NEXT: 2 4 5.00 mulw %si @@ -1968,7 +1968,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: 769.50 769.50 - - 246.00 1815.50 2220.50 - - - - - - - - - - - - 616.50 616.50 136.00 306.00 +# CHECK-NEXT: 764.00 764.00 - - 246.00 1804.50 2209.50 - - - - - - - - - - - - 611.00 611.00 136.00 306.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: @@ -2338,26 +2338,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbw %al, %di # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbw %al, %di -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbw (%rax), %di -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbw (%rax), %di +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbw (%rax), %di +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbw (%rax), %di # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbl (%rax), %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbl (%rax), %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbq (%rax), %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbq (%rax), %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movswl (%rax), %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzwl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movswl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzwl (%rax), %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movswq (%rax), %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzwq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movswq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzwq (%rax), %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movslq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movslq (%rax), %rdi # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - 4.00 - mulb %dil # CHECK-NEXT: 1.50 1.50 - - - - 1.00 - - - - - - - - - - - - 1.50 1.50 4.00 - mulb (%rax) # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - 5.00 - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s index b0c89f017731d..6d750008119ad 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.50 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.50 movsbw %al, %di # CHECK-NEXT: 1 1 0.50 movzbw %al, %di -# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di -# CHECK-NEXT: 1 4 1.00 * movzbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.50 movsbl %al, %edi # CHECK-NEXT: 1 1 0.50 movzbl %al, %edi -# CHECK-NEXT: 1 4 1.00 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.50 movzbq %al, %rdi -# CHECK-NEXT: 1 4 1.00 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movswl %ax, %edi # CHECK-NEXT: 1 1 0.50 movzwl %ax, %edi -# CHECK-NEXT: 1 4 1.00 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzwl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.50 movzwq %ax, %rdi -# CHECK-NEXT: 1 4 1.00 * movswq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi -# CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 1 3 1.00 mulb %dil # CHECK-NEXT: 1 6 1.00 * mulb (%rax) # CHECK-NEXT: 3 3 3.00 mulw %si @@ -1959,7 +1959,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 80.00 893.00 - - - - +# CHECK-NEXT: 717.00 767.00 380.00 - - - - 992.00 80.00 893.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -2329,26 +2329,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbw %al, %di # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbw (%rax), %di # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbl (%rax), %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movswl (%rax), %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzwl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movswl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzwl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulb %dil # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulb (%rax) # CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s index f48ac11746092..1491da0f17a83 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 1.00 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.50 movsbw %al, %di # CHECK-NEXT: 1 1 0.50 movzbw %al, %di -# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di -# CHECK-NEXT: 1 4 1.00 * movzbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.50 movsbl %al, %edi # CHECK-NEXT: 1 1 0.50 movzbl %al, %edi -# CHECK-NEXT: 1 4 1.00 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.50 movzbq %al, %rdi -# CHECK-NEXT: 1 4 1.00 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movswl %ax, %edi # CHECK-NEXT: 1 1 0.50 movzwl %ax, %edi -# CHECK-NEXT: 1 4 1.00 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzwl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.50 movzwq %ax, %rdi -# CHECK-NEXT: 1 4 1.00 * movswq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi -# CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 3 5 5.00 mulb %dil # CHECK-NEXT: 3 8 5.00 * mulb (%rax) # CHECK-NEXT: 4 5 5.00 mulw %si @@ -1953,7 +1953,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: 400.00 - - 49.00 - 660.00 598.00 835.00 +# CHECK-NEXT: 400.00 - - 49.00 - 654.50 592.50 835.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -2323,26 +2323,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - 1.00 - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - - - 0.50 0.50 - movsbw %al, %di # CHECK-NEXT: - - - - - 0.50 0.50 - movzbw %al, %di -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbw (%rax), %di -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 movsbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 movzbw (%rax), %di # CHECK-NEXT: - - - - - 0.50 0.50 - movsbl %al, %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movzbl %al, %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbl (%rax), %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movsbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movzbl (%rax), %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movsbq %al, %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movzbq %al, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbq (%rax), %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movsbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movzbq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movswl %ax, %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movzwl %ax, %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movswl (%rax), %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzwl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movswl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movzwl (%rax), %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movswq %ax, %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movzwq %ax, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movswq (%rax), %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzwq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movswq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movzwq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movslq %eax, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movslq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movslq (%rax), %rdi # CHECK-NEXT: - - - - - - 5.00 - mulb %dil # CHECK-NEXT: - - - - - - 5.00 1.00 mulb (%rax) # CHECK-NEXT: - - - - - - 5.00 - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s index 7eadac52bfb3a..90e7553f092a7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.25 movsbw %al, %di # CHECK-NEXT: 1 1 0.25 movzbw %al, %di -# CHECK-NEXT: 2 5 0.50 * movsbw (%rax), %di -# CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di +# CHECK-NEXT: 1 4 0.50 * movsbw (%rax), %di +# CHECK-NEXT: 1 4 0.50 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi # CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi # CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi -# CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi -# CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi # CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi # CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi -# CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi -# CHECK-NEXT: 2 5 0.50 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movswq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movslq %eax, %rdi -# CHECK-NEXT: 2 5 0.50 * movslq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 1.00 mulb %dil # CHECK-NEXT: 2 8 1.00 * mulb (%rax) # CHECK-NEXT: 1 3 1.00 mulw %si @@ -1957,7 +1957,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 233.00 233.00 230.50 264.50 246.50 230.50 392.00 - - - - 34.00 +# CHECK-NEXT: 233.00 233.00 228.75 262.75 244.75 228.75 392.00 - - - - 34.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2327,26 +2327,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbw %al, %di # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbw (%rax), %di +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbw (%rax), %di # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbl %al, %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbl %al, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbl (%rax), %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movswl %ax, %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzwl %ax, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movswl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzwl (%rax), %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 mulb %dil # CHECK-NEXT: 0.50 0.50 - 1.00 - - - - - - - 1.00 mulb (%rax) # CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s index 3cd41c176ce04..2bef39cec5598 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1198,26 +1198,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.25 movsbw %al, %di # CHECK-NEXT: 1 1 0.25 movzbw %al, %di -# CHECK-NEXT: 2 5 0.33 * movsbw (%rax), %di -# CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di +# CHECK-NEXT: 1 4 0.33 * movsbw (%rax), %di +# CHECK-NEXT: 1 4 0.33 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi # CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi # CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi -# CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi -# CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi # CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi # CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi -# CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi -# CHECK-NEXT: 2 5 0.33 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movswq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movslq %eax, %rdi -# CHECK-NEXT: 2 5 0.33 * movslq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 1.00 mulb %dil # CHECK-NEXT: 2 8 1.00 * mulb (%rax) # CHECK-NEXT: 1 3 1.00 mulw %si @@ -1696,7 +1696,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 116.00 116.00 116.00 197.00 231.00 213.00 197.00 392.00 - - - - 34.00 +# CHECK-NEXT: 116.00 116.00 116.00 195.25 229.25 211.25 195.25 392.00 - - - - 34.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1999,26 +1999,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbw %al, %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbw (%rax), %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbl %al, %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbl (%rax), %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzwl %ax, %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movswl (%rax), %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzwl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - - - - 1.00 - - - - - - - 1.00 mulb %dil # CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - 1.00 mulb (%rax) # CHECK-NEXT: - - - - 1.00 - - - - - - - 1.00 mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s index 0783c5decbb88..9a201b0219784 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s @@ -1706,7 +1706,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 176.00 176.00 176.00 1669.50 1824.50 1704.50 1467.50 - - - - - - - - 175.33 175.33 175.33 109.00 109.00 109.00 99.50 99.50 +# CHECK-NEXT: 176.00 176.00 176.00 1667.25 1822.25 1702.25 1465.25 - - - - - - - - 175.33 175.33 175.33 109.00 109.00 109.00 99.50 99.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2013,22 +2013,22 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 1.00 1.00 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbw (%rax), %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbl (%rax), %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswl (%rax), %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movslq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulb %dil # CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulb (%rax) # CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulw %si From 49143f9d1471e3f01a6d32001676b29274298e22 Mon Sep 17 00:00:00 2001 From: luxufan Date: Sun, 6 Nov 2022 10:44:50 +0800 Subject: [PATCH 357/516] [IndVars] Forget the SCEV when the instruction has been sunk. In the past, the SCEV expression of the sunk instruction was not forgetted. This led to the incorrect block dispositions after the instruction be sunk. Fixes https://github.com/llvm/llvm-project/issues/58662 Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D137060 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- .../IndVarSimplify/scev-invalidation.ll | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index f6431b77d8027..fdc1232ad4fa6 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1281,7 +1281,7 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { MadeAnyChanges = true; ToMove->moveBefore(*ExitBlock, InsertPt); - SE->forgetBlockAndLoopDispositions(ToMove); + SE->forgetValue(ToMove); if (Done) break; InsertPt = ToMove->getIterator(); } diff --git a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll index 0dac4f3eb13a0..5cecdac7d99f4 100644 --- a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll +++ b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll @@ -70,3 +70,57 @@ exit: %res = add i32 %c.ext, %or ret i32 %res } + +define i8 @l(i32 %inc, i1 %tobool.not.i) { +; CHECK-LABEL: @l( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[C_05_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[INNER]] ], [ 0, [[OUTER_HEADER]] ] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[C_05_I]], 1 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[C_05_I]], 0 +; CHECK-NEXT: [[OR_COND_I:%.*]] = select i1 [[CMP_I]], i1 true, i1 [[TOBOOL_NOT_I:%.*]] +; CHECK-NEXT: br i1 [[OR_COND_I]], label [[OUTER_LATCH:%.*]], label [[INNER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[C_05_I_LCSSA:%.*]] = phi i32 [ [[C_05_I]], [[INNER]] ] +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[INNER]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[AND]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C_05_I_LCSSA]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[OUTER_HEADER]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret i8 0 +; +entry: + br label %outer.header + +outer.header: ; preds = %h.exit, %entry + %outer.iv = phi i16 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %and = and i32 1, %inc + %conv = sext i16 %outer.iv to i32 + br label %inner + +inner: ; preds = %while.body.i, %for.cond + %c.05.i = phi i32 [ %inc.i, %inner ], [ 0, %outer.header ] + %i.addr.04.i = phi i32 [ 0, %inner ], [ %conv, %outer.header ] + %inc.i = add nsw i32 %c.05.i, 1 + %cmp.i = icmp sgt i32 %c.05.i, 0 + %or.cond.i = select i1 %cmp.i, i1 true, i1 %tobool.not.i + br i1 %or.cond.i, label %outer.latch, label %inner + +outer.latch: ; preds = %while.body.i + %lcssa = phi i32 [ 0, %inner ] + %0 = trunc i32 %and to i8 + %1 = trunc i32 %c.05.i to i8 + %2 = sub i8 %0, %1 + %tobool.not = icmp eq i8 %2, 0 + %outer.iv.next = add i16 %outer.iv, 1 + br i1 %tobool.not, label %outer.header, label %if.then + +if.then: ; preds = %h.exit + ret i8 0 +} From e0ed3d5d2e93b91ee7983a2ef630ef32fe4f5b71 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 6 Nov 2022 08:16:36 -0800 Subject: [PATCH 358/516] Online sync-ups: add SYCL working group sync details. --- llvm/docs/GettingInvolved.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 7315271b7fe37..889fb91ba2f63 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -227,6 +227,10 @@ what to add to your calendar invite. - Every week on Thursday - - `Meeting details/agenda `__ + * - SYCL Upstream Working Group + - Every 2 weeks on Mondays + - `gcal `__ + - `Meeting details/agenda `__ .. _office-hours: From f5a2ef80fa47d657877d5be314ce29ff7195d887 Mon Sep 17 00:00:00 2001 From: v1nh1shungry Date: Sun, 6 Nov 2022 18:30:41 +0100 Subject: [PATCH 359/516] [clangd] Fix the code action `RemoveUsingNamespace` Avoid adding qualifiers before C++ operators declared in a non-class context Reviewed By: tom-anders Differential Revision: https://reviews.llvm.org/D137494 --- .../refactor/tweaks/RemoveUsingNamespace.cpp | 7 ++++++ .../tweaks/RemoveUsingNamespaceTests.cpp | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp index 8df7a448c4383..93fdbb9486cc7 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp @@ -155,6 +155,13 @@ Expected RemoveUsingNamespace::apply(const Selection &Inputs) { if (!visibleContext(T->getDeclContext()) ->Equals(TargetDirective->getNominatedNamespace())) return; + // Avoid adding qualifiers before operators, e.g. + // using namespace std; + // cout << "foo"; // Must not changed to std::cout std:: << "foo" + // FIXME: User-defined literals are not handled + if (T->isInIdentifierNamespace( + Decl::IdentifierNamespace::IDNS_NonMemberOperator)) + return; } SourceLocation Loc = Ref.NameLoc; if (Loc.isMacroID()) { diff --git a/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp index 59788e75d1698..3449c6475e3fc 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp @@ -226,6 +226,29 @@ TEST_F(RemoveUsingNamespaceTest, All) { int main() { std::vector V; } + )cpp"}, + {// Does not qualify operators declared in a non-class context + R"cpp( + namespace ns { + struct Foo {}; + void operator+(const Foo &, int) {} + } + using namespace n^s; + int main() { + Foo foo; + foo + 10; + } + )cpp", + R"cpp( + namespace ns { + struct Foo {}; + void operator+(const Foo &, int) {} + } + + int main() { + ns::Foo foo; + foo + 10; + } )cpp"}}; for (auto C : Cases) EXPECT_EQ(C.second, apply(C.first)) << C.first; From d42357007da6f70e91553685324b404428fcd290 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 08:39:41 -0800 Subject: [PATCH 360/516] [lld] Use llvm::reverse (NFC) --- lld/ELF/Writer.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 918490b972efb..2d99f6d6f7d5e 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1794,10 +1794,9 @@ static void removeUnusedSyntheticSections() { // all regular ones. Reverse iterate to find the first synthetic section // after a non-synthetic one which will be our starting point. auto start = - std::find_if( - ctx.inputSections.rbegin(), ctx.inputSections.rend(), - [](InputSectionBase *s) { return !isa(s); }) - .base(); + llvm::find_if(llvm::reverse(ctx.inputSections), [](InputSectionBase *s) { + return !isa(s); + }).base(); // Remove unused synthetic sections from ctx.inputSections; DenseSet unused; From 6a7a1188d3bb639eb0c817484075e6c116a88ac2 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 3 Nov 2022 19:52:16 +0000 Subject: [PATCH 361/516] Apply clang-tidy fixes for llvm-else-after-return in VectorToGPU.cpp (NFC) --- mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 01654fdd6024a..b64b0d88a3e37 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -212,7 +212,7 @@ extractStridedSliceSupportsMMAMatrixType(vector::ExtractStridedSliceOp op) { if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::B) return (op->getResult(0).getType().cast() == (*contractOp).getRhs().getType().cast()); - else if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::C) + if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::C) return (op->getResult(0).getType().cast() == (*contractOp).getAcc().getType().cast()); @@ -768,7 +768,7 @@ convertExtractStridedSlice(vector::ExtractStridedSliceOp op, if (offsets[0] && offsets[1]) return op->emitError() << "Slicing fragments in 2D is not supported. "; - else if (offsets[0]) + if (offsets[0]) sliceOffset[0] = (warpVectorShape[0] / offsets[0]); else if (offsets[1]) sliceOffset[0] = (warpVectorShape[1] / offsets[1]); From c310a6dda2f2f2a7011eb817431a0bb6774717c9 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 3 Nov 2022 20:07:33 +0000 Subject: [PATCH 362/516] Apply clang-tidy fixes for readability-simplify-boolean-expr in GPUDialect.cpp (NFC) --- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 3eee6081e7eef..2c30425f72458 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -311,13 +311,10 @@ static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op, static bool verifyReduceOpAndType(gpu::AllReduceOperation opName, Type resType) { - if ((opName == gpu::AllReduceOperation::AND || - opName == gpu::AllReduceOperation::OR || - opName == gpu::AllReduceOperation::XOR) && - !resType.isa()) - return false; - - return true; + return !((opName == gpu::AllReduceOperation::AND || + opName == gpu::AllReduceOperation::OR || + opName == gpu::AllReduceOperation::XOR) && + !resType.isa()); } LogicalResult gpu::AllReduceOp::verifyRegions() { From 7987a0da9ff21a9cc7679f955ce14a4cbb4f57d6 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 6 Nov 2022 20:23:05 +0000 Subject: [PATCH 363/516] Add missing dependent FuncDialect for `convert-async-to-llvm` pass Fixes #58805 --- mlir/include/mlir/Conversion/Passes.td | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index cef82f1e29ff1..5631d60025fa8 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -155,6 +155,7 @@ def ConvertAsyncToLLVM : Pass<"convert-async-to-llvm", "ModuleOp"> { "arith::ArithDialect", "async::AsyncDialect", "LLVM::LLVMDialect", + "func::FuncDialect", ]; } From b84fd822fa7eeaec2bb084a26caa9e41f3495923 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Sun, 6 Nov 2022 15:07:42 -0800 Subject: [PATCH 364/516] Add boundary check for ASTUnresolvedSet::erase When compile following code with clang (Debug build), Assertion will be triggered. ``` struct A { struct Nested {}; operator Nested*() {return 0;}; }; struct B : A { using A::operator typename A::Nested*; operator typename A::Nested *() { struct A * thi = this; return *thi; }; }; ``` The assertion fail is caused by: `void erase(unsigned I) { Decls[I] = Decls.pop_back_val(); }` when size of `Decls` is 1 before erase. Reviewed By: rjmccall, MaskRay Differential Revision: https://reviews.llvm.org/D137263 --- clang/include/clang/AST/ASTUnresolvedSet.h | 7 ++++++- clang/test/SemaCXX/using-decl-templates.cpp | 22 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/ASTUnresolvedSet.h b/clang/include/clang/AST/ASTUnresolvedSet.h index 8d2b23b3539a2..398ffb188c95b 100644 --- a/clang/include/clang/AST/ASTUnresolvedSet.h +++ b/clang/include/clang/AST/ASTUnresolvedSet.h @@ -69,7 +69,12 @@ class ASTUnresolvedSet { return false; } - void erase(unsigned I) { Decls[I] = Decls.pop_back_val(); } + void erase(unsigned I) { + if (I == Decls.size() - 1) + Decls.pop_back(); + else + Decls[I] = Decls.pop_back_val(); + } void clear() { Decls.clear(); } diff --git a/clang/test/SemaCXX/using-decl-templates.cpp b/clang/test/SemaCXX/using-decl-templates.cpp index 73d9bc3e774cb..77dc596fdfc9f 100644 --- a/clang/test/SemaCXX/using-decl-templates.cpp +++ b/clang/test/SemaCXX/using-decl-templates.cpp @@ -102,6 +102,28 @@ struct Derived : Base { // expected-note {{requested here}} }; } // namespace DontDiagnoseInvalidTest +namespace shadow_nested_operator { +template +struct A { + struct Nested {}; + operator Nested*() {return 0;}; +}; + +template +struct B : A { + using A::operator typename A::Nested*; + operator typename A::Nested *() { + struct A * thi = this; + return *thi; + }; +}; + +int foo () { + struct B b; + auto s = *b; +} +} // namespace shadow_nested_operator + namespace func_templ { namespace sss { double foo(int, double); From a86cfceb44114b3eb871c808d0caca9d512ec5b3 Mon Sep 17 00:00:00 2001 From: Andreas Hollandt Date: Sun, 6 Nov 2022 15:13:32 -0800 Subject: [PATCH 365/516] [ELF] Add gdb index time trace Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D135659 --- lld/ELF/SyntheticSections.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index cb98a2daf9428..316d89411cdad 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2835,6 +2835,8 @@ createSymbols( // Returns a newly-created .gdb_index section. template GdbIndexSection *GdbIndexSection::create() { + llvm::TimeTraceScope timeScope("Create gdb index"); + // Collect InputFiles with .debug_info. See the comment in // LLDDwarfObj::LLDDwarfObj. If we do lightweight parsing in the future, // note that isec->data() may uncompress the full content, which should be From 7aa90b21b453d1ca52fdfccfd7e01e61d9e5b1f1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 6 Nov 2022 16:16:50 -0800 Subject: [PATCH 366/516] [PowerPC] Replace __ppc64__ with __powerpc64__ The lowercase __ppc64__ is not defined by non-darwin GCC, therefore it lures users to write code which is not portable to GCC. Migrate to __powerpc64__ in preparation for undefining __ppc64__. __powerpc64__ is much more common than __PPC64__. --- clang/lib/Headers/ppc_wrappers/emmintrin.h | 4 ++-- clang/lib/Headers/ppc_wrappers/mm_malloc.h | 2 +- clang/lib/Headers/ppc_wrappers/mmintrin.h | 4 ++-- clang/lib/Headers/ppc_wrappers/pmmintrin.h | 4 ++-- clang/lib/Headers/ppc_wrappers/smmintrin.h | 4 ++-- clang/lib/Headers/ppc_wrappers/tmmintrin.h | 4 ++-- clang/lib/Headers/ppc_wrappers/xmmintrin.h | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/lib/Headers/ppc_wrappers/emmintrin.h b/clang/lib/Headers/ppc_wrappers/emmintrin.h index a4c458a41bcf4..0814ea5593bad 100644 --- a/clang/lib/Headers/ppc_wrappers/emmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/emmintrin.h @@ -36,7 +36,7 @@ #ifndef EMMINTRIN_H_ #define EMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -2262,7 +2262,7 @@ extern __inline __m128d #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* EMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h index 65920917f3bdc..7c1e625e44d51 100644 --- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h +++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h @@ -10,7 +10,7 @@ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include diff --git a/clang/lib/Headers/ppc_wrappers/mmintrin.h b/clang/lib/Headers/ppc_wrappers/mmintrin.h index 70e8b81e11ee6..0be3af2b0bd72 100644 --- a/clang/lib/Headers/ppc_wrappers/mmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/mmintrin.h @@ -35,7 +35,7 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -1447,7 +1447,7 @@ extern __inline __m64 #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/clang/lib/Headers/ppc_wrappers/pmmintrin.h b/clang/lib/Headers/ppc_wrappers/pmmintrin.h index fda39edbaa223..db128192abfb4 100644 --- a/clang/lib/Headers/ppc_wrappers/pmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/pmmintrin.h @@ -39,7 +39,7 @@ #ifndef PMMINTRIN_H_ #define PMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* We need definitions from the SSE2 and SSE header files*/ @@ -139,7 +139,7 @@ extern __inline __m128i #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* PMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 6fe6c8a93d9ba..6fe6d2a157a59 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -29,7 +29,7 @@ #ifndef SMMINTRIN_H_ #define SMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -657,7 +657,7 @@ extern __inline __m128i #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* SMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/tmmintrin.h b/clang/lib/Headers/ppc_wrappers/tmmintrin.h index 6185ca1e7e710..92f08676d2dfa 100644 --- a/clang/lib/Headers/ppc_wrappers/tmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/tmmintrin.h @@ -25,7 +25,7 @@ #ifndef TMMINTRIN_H_ #define TMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -447,7 +447,7 @@ extern __inline __m64 #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* TMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index ee0032ca159cb..9dd21b65c2f70 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -35,7 +35,7 @@ #ifndef XMMINTRIN_H_ #define XMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* Define four value permute mask */ @@ -1821,7 +1821,7 @@ extern __inline void #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* XMMINTRIN_H_ */ From 6c927f2a8659a8ca35d223b7ccc0c0a03e8a376a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 6 Nov 2022 17:29:44 -0800 Subject: [PATCH 367/516] Canonicalize PowerPC detection macros to __powerpc__ --- llvm/include/llvm/Support/Threading.h | 5 ++--- llvm/lib/Support/Host.cpp | 4 ++-- llvm/lib/Support/Unix/Memory.inc | 10 +++------- llvm/unittests/ADT/PackedVectorTest.cpp | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index 44e133de854b8..7f2708ddbb467 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -26,14 +26,13 @@ #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #elif defined(LLVM_ON_UNIX) && \ (defined(_LIBCPP_VERSION) || \ - !(defined(__NetBSD__) || defined(__OpenBSD__) || \ - (defined(__ppc__) || defined(__PPC__)))) + !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__))) // std::call_once from libc++ is used on all Unix platforms. Other // implementations like libstdc++ are known to have problems on NetBSD, // OpenBSD and PowerPC. #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #elif defined(LLVM_ON_UNIX) && \ - ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__)) + (defined(__powerpc__) && defined(__LITTLE_ENDIAN__)) #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #else #define LLVM_THREADING_USE_STD_CALL_ONCE 0 diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index bd8a206b84448..732aa83090439 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1251,7 +1251,7 @@ StringRef sys::getHostCPUName() { return "generic"; } -#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +#elif defined(__APPLE__) && defined(__powerpc__) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; @@ -1295,7 +1295,7 @@ StringRef sys::getHostCPUName() { return "generic"; } -#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) +#elif defined(__linux__) && defined(__powerpc__) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); StringRef Content = P ? P->getBuffer() : ""; diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 5e008069dd989..e4454fe8c517d 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -50,8 +50,7 @@ static int getPosixProtectionFlags(unsigned Flags) { llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: -#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) +#if defined(__FreeBSD__) || defined(__powerpc__) // On PowerPC, having an executable page that has no read permission // can have unintended consequences. The function InvalidateInstruction- // Cache uses instructions dcbf and icbi, both of which are treated by @@ -213,9 +212,7 @@ void Memory::InvalidateInstructionCache(const void *Addr, // icache invalidation for PPC and ARM. #if defined(__APPLE__) -# if (defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \ - defined(__arm64__)) +# if (defined(__powerpc__) || defined(__arm__) || defined(__arm64__)) sys_icache_invalidate(const_cast(Addr), Len); # endif @@ -226,8 +223,7 @@ void Memory::InvalidateInstructionCache(const void *Addr, #else -# if (defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) +# if defined(__powerpc__) && defined(__GNUC__) const size_t LineSize = 32; const intptr_t Mask = ~(LineSize - 1); diff --git a/llvm/unittests/ADT/PackedVectorTest.cpp b/llvm/unittests/ADT/PackedVectorTest.cpp index 24df398934670..b4e017971efac 100644 --- a/llvm/unittests/ADT/PackedVectorTest.cpp +++ b/llvm/unittests/ADT/PackedVectorTest.cpp @@ -8,7 +8,7 @@ // BitVectorTest tests fail on PowerPC for unknown reasons, so disable this // as well since it depends on a BitVector. -#ifndef __ppc__ +#ifndef __powerpc__ #include "llvm/ADT/PackedVector.h" #include "gtest/gtest.h" From 0f91ec243d6ddb1f8b1f80d15f9003e5f06b0a21 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 6 Nov 2022 17:34:04 -0800 Subject: [PATCH 368/516] [builtins] Canonicalize PowerPC detection macros to __powerpc__ The lowercase __ppc__ is not defined by Linux GCC, therefore it lures users to write code which is not portable to GCC. Migrate to __powerpc__ in preparation for undefining __ppc__. __powerpc__ is much more common than __PPC__. --- compiler-rt/lib/builtins/apple_versioning.c | 4 ++-- compiler-rt/lib/builtins/trampoline_setup.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/apple_versioning.c b/compiler-rt/lib/builtins/apple_versioning.c index f87b42820c154..83d419418f241 100644 --- a/compiler-rt/lib/builtins/apple_versioning.c +++ b/compiler-rt/lib/builtins/apple_versioning.c @@ -138,13 +138,13 @@ NOT_HERE_BEFORE_10_6(__udivti3) NOT_HERE_BEFORE_10_6(__umoddi3) NOT_HERE_BEFORE_10_6(__umodti3) -#if __ppc__ +#if __powerpc__ NOT_HERE_BEFORE_10_6(__gcc_qadd) NOT_HERE_BEFORE_10_6(__gcc_qdiv) NOT_HERE_BEFORE_10_6(__gcc_qmul) NOT_HERE_BEFORE_10_6(__gcc_qsub) NOT_HERE_BEFORE_10_6(__trampoline_setup) -#endif // __ppc__ +#endif // __powerpc__ NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c index a62431723d787..844eb27944142 100644 --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -16,7 +16,7 @@ extern void __clear_cache(void *start, void *end); // which loads r11 with a pointer to the outer function's locals // and then jumps to the target nested function. -#if __ppc__ && !defined(__powerpc64__) +#if __powerpc__ && !defined(__powerpc64__) COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, int trampSizeAllocated, const void *realFunc, void *localsPtr) { @@ -40,4 +40,4 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, // clear instruction cache __clear_cache(trampOnStack, &trampOnStack[10]); } -#endif // __ppc__ && !defined(__powerpc64__) +#endif // __powerpc__ && !defined(__powerpc64__) From 34f687cbe468e8044e8d94c977a2e47cbd3f3799 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 6 Nov 2022 17:38:57 -0800 Subject: [PATCH 369/516] [test] Canonicalize PowerPC detection macros to __powerpc__ --- clang/test/Sema/128bitfloat.cpp | 4 ++-- clang/test/Sema/attr-mode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/Sema/128bitfloat.cpp b/clang/test/Sema/128bitfloat.cpp index 6b9d63e6af4cf..b98b42496e8db 100644 --- a/clang/test/Sema/128bitfloat.cpp +++ b/clang/test/Sema/128bitfloat.cpp @@ -7,7 +7,7 @@ #if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) -#if defined(__ppc__) +#if defined(__powerpc__) template struct __is_float128 { static constexpr bool value = false; }; template <> struct __is_float128<__float128> { static constexpr bool value = true; }; static_assert(__is_float128<__ieee128>::value, "__ieee128 aliases to __float128"); @@ -45,7 +45,7 @@ int g(int x, __float128 *y) { // expected-error {{__float128 is not supported o #endif #endif -#ifdef __ppc__ +#ifdef __powerpc__ __ibm128 i; template <> struct __is_floating_point_helper<__ibm128> {}; int w(int x, __ibm128 *y) { diff --git a/clang/test/Sema/attr-mode.c b/clang/test/Sema/attr-mode.c index 71d82a20f66d0..5e99c4583155a 100644 --- a/clang/test/Sema/attr-mode.c +++ b/clang/test/Sema/attr-mode.c @@ -46,7 +46,7 @@ typedef _Complex double c32 __attribute((mode(SC))); int c32_test[sizeof(c32) == 8 ? 1 : -1]; typedef _Complex float c64 __attribute((mode(DC))); -#if !defined(__ppc__) && !defined(__mips__) // Note, 'XC' mode is illegal for PPC64 and MIPS machines. +#if !defined(__powerpc__) && !defined(__mips__) // Note, 'XC' mode is illegal for PPC64 and MIPS machines. typedef _Complex float c80 __attribute((mode(XC))); #endif From 7c50bcb441707f862cd7fcd07d81fbcdc29e98dc Mon Sep 17 00:00:00 2001 From: wangpc Date: Mon, 7 Nov 2022 10:40:19 +0800 Subject: [PATCH 370/516] [RISCV] Support -mcpu/mtune=native We may need hosted Clang/LLVM to compile and `getHostCpuName` can be used for native detection. Tests are added in riscv-cpus.c just like what AArch64/PPC have done. Reviewed By: kito-cheng Differential Revision: https://reviews.llvm.org/D136930 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 43 +++++++++++++++++----- clang/lib/Driver/ToolChains/Arch/RISCV.h | 2 + clang/lib/Driver/ToolChains/Clang.cpp | 5 ++- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 +-- clang/test/Driver/riscv-cpus.c | 14 +++++-- 6 files changed, 53 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7bb1405c131ab..72b9a52c7cd0d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -677,6 +677,7 @@ RISC-V Support in Clang ----------------------- - ``sifive-7-rv32`` and ``sifive-7-rv64`` are no longer supported for ``-mcpu``. Use ``sifive-e76``, ``sifive-s76``, or ``sifive-u74`` instead. +- Native detections via ``-mcpu=native`` and ``-mtune=native`` are supported. X86 Support in Clang -------------------- diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index c845e69c14e84..081e8ff4a168f 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Host.h" #include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" @@ -48,16 +49,12 @@ static bool getArchFeatures(const Driver &D, StringRef Arch, } // Get features except standard extension feature -static void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args, - const llvm::opt::Arg *A, StringRef Mcpu, +static bool getRISCFeaturesFromMcpu(const llvm::Triple &Triple, StringRef Mcpu, std::vector &Features) { bool Is64Bit = Triple.isRISCV64(); llvm::RISCV::CPUKind CPUKind = llvm::RISCV::parseCPUKind(Mcpu); - if (!llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) || - !llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features)) { - D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); - } + return llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) && + llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features); } void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, @@ -70,8 +67,14 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, // If users give march and mcpu, get std extension feature from MArch // and other features (ex. mirco architecture feature) from mcpu - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - getRISCFeaturesFromMcpu(D, Triple, Args, A, A->getValue(), Features); + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + StringRef CPU = A->getValue(); + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + if (!getRISCFeaturesFromMcpu(Triple, CPU, Features)) + D.Diag(clang::diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << CPU; + } // Handle features corresponding to "-ffixed-X" options if (Args.hasArg(options::OPT_ffixed_x1)) @@ -260,7 +263,10 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, // 2. Get march (isa string) based on `-mcpu=` if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef MArch = llvm::RISCV::getMArchFromMcpu(A->getValue()); + StringRef CPU = A->getValue(); + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + StringRef MArch = llvm::RISCV::getMArchFromMcpu(CPU); // Bypass if target cpu's default march is empty. if (MArch != "") return MArch; @@ -299,3 +305,20 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, return "rv64imafdc"; } } + +std::string riscv::getRISCVTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple) { + std::string CPU; + // If we have -mcpu, use that. + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) + CPU = A->getValue(); + + // Handle CPU name is 'native'. + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + + if (!CPU.empty()) + return CPU; + + return Triple.isRISCV64() ? "generic-rv64" : "generic-rv32"; +} diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.h b/clang/lib/Driver/ToolChains/Arch/RISCV.h index d4a519cdab340..c30f1098ddda5 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.h +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.h @@ -26,6 +26,8 @@ StringRef getRISCVABI(const llvm::opt::ArgList &Args, const llvm::Triple &Triple); StringRef getRISCVArch(const llvm::opt::ArgList &Args, const llvm::Triple &Triple); +std::string getRISCVTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); } // end namespace riscv } // namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f87325141b7eb..afb92dae27d35 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2187,7 +2187,10 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { CmdArgs.push_back("-tune-cpu"); - CmdArgs.push_back(A->getValue()); + if (strcmp(A->getValue(), "native") == 0) + CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); + else + CmdArgs.push_back(A->getValue()); } } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 1fcd160b76fab..6c397c697739d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -12,6 +12,7 @@ #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" +#include "Arch/RISCV.h" #include "Arch/Sparc.h" #include "Arch/SystemZ.h" #include "Arch/VE.h" @@ -432,9 +433,7 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, return "ck810"; case llvm::Triple::riscv32: case llvm::Triple::riscv64: - if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - return A->getValue(); - return ""; + return riscv::getRISCVTargetCPU(Args, T); case llvm::Triple::bpfel: case llvm::Triple::bpfeb: diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index 4858455d5775f..32d8569fe06fb 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -7,6 +7,10 @@ // MCPU-ROCKET64: "-nostdsysteminc" "-target-cpu" "rocket-rv64" // MCPU-ROCKET64: "-target-feature" "+64bit" +// We cannot check much for -mcpu=native, but it should be replaced by a valid CPU string. +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mcpu=native | FileCheck -check-prefix=MCPU-NATIVE %s +// MCPU-NATIVE-NOT: "-target-cpu" "native" + // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mtune=rocket-rv32 | FileCheck -check-prefix=MTUNE-ROCKET32 %s // MTUNE-ROCKET32: "-tune-cpu" "rocket-rv32" @@ -26,6 +30,10 @@ // RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=rocket | FileCheck -check-prefix=MTUNE-ROCKET-64 %s // MTUNE-ROCKET-64: "-tune-cpu" "rocket" +// We cannot check much for -mtune=native, but it should be replaced by a valid CPU string. +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=native | FileCheck -check-prefix=MTUNE-NATIVE %s +// MTUNE-NATIVE-NOT: "-tune-cpu" "native" + // mcpu with default march // RUN: %clang --target=riscv64 -### -c %s 2>&1 -mcpu=sifive-e20 | FileCheck -check-prefix=MCPU-SIFIVE-E20 %s // MCPU-SIFIVE-E20: "-nostdsysteminc" "-target-cpu" "sifive-e20" @@ -130,10 +138,10 @@ // Check failed cases // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv321 | FileCheck -check-prefix=FAIL-MCPU-NAME %s -// FAIL-MCPU-NAME: error: the clang compiler does not support '-mcpu=generic-rv321' +// FAIL-MCPU-NAME: error: unsupported argument 'generic-rv321' to option '-mcpu=' // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 -march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s -// MISMATCH-ARCH: error: the clang compiler does not support '-mcpu=generic-rv32' +// MISMATCH-ARCH: error: unsupported argument 'generic-rv32' to option '-mcpu=' // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv64 | FileCheck -check-prefix=MISMATCH-MCPU %s -// MISMATCH-MCPU: error: the clang compiler does not support '-mcpu=generic-rv64' +// MISMATCH-MCPU: error: unsupported argument 'generic-rv64' to option '-mcpu=' From 7c80e7a2943ade7b999c14ad582b52035098e5ae Mon Sep 17 00:00:00 2001 From: Sean Cross Date: Sun, 6 Nov 2022 19:02:08 -0800 Subject: [PATCH 371/516] [builtins] Check __SIZEOF_INT128__ for CRT_HAS_128BIT When building libstd on Rust for a riscv32 target, `compiler-rt` fails to build with the following error: ``` running: "riscv-none-elf-gcc" "-O3" "-ffunction-sections" "-fdata-sections" "-fPIC" "-march=rv32imac" "-mabi=ilp32" "-mcmodel=medany" "-fno-builtin" "-fvisibility=hidden" "-ffreestanding" "-fomit-frame-pointer" "-ffile-prefix-map=E:\\Code\\Xous\\rust-next\\src\\llvm-project\\compiler-rt=." "-DVISIBILITY_HIDDEN" "-o" "E:\\Code\\Xous\\rust-next\\target\\riscv32imac-unknown-xous-elf\\release\\build\\compiler_builtins-b0d7dd25c6999904\\out\\absvdi2.o" "-c" "E:\\Code\\Xous\\rust-next\\src\\llvm-project\\compiler-rt\\lib/builtins\\absvdi2.c" cargo:warning=In file included from E:\Code\Xous\rust-next\src\llvm-project\compiler-rt\lib/builtins\int_lib.h:99, cargo:warning= from E:\Code\Xous\rust-next\src\llvm-project\compiler-rt\lib/builtins\absvdi2.c:13: cargo:warning=E:\Code\Xous\rust-next\src\llvm-project\compiler-rt\lib/builtins\int_types.h:79:1: error: unable to emulate 'TI' cargo:warning= 79 | typedef int ti_int __attribute__((mode(TI))); cargo:warning= | ^~~~~~~ cargo:warning=E:\Code\Xous\rust-next\src\llvm-project\compiler-rt\lib/builtins\int_types.h:80:1: error: unable to emulate 'TI' cargo:warning= 80 | typedef unsigned tu_int __attribute__((mode(TI))); cargo:warning= | ^~~~~~~ exit code: 1 ``` This is because 128-bit support is gated on the `__riscv` compiler macro which is valid for both rv32 and rv64. However, only rv64 has 128-bit support, so this fails when building for rv32. Add a check for `__SIZEOF_INT128__` to ensure that 128-bit support is only enabled on targets that support it. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D137485 --- compiler-rt/lib/builtins/int_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h index 7a72de4806764..e94d3154c6d4e 100644 --- a/compiler-rt/lib/builtins/int_types.h +++ b/compiler-rt/lib/builtins/int_types.h @@ -64,7 +64,7 @@ typedef union { } udwords; #if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ - defined(__riscv) || defined(_WIN64) + defined(__SIZEOF_INT128__) || defined(_WIN64) #define CRT_HAS_128BIT #endif From 7c2b76f7cfe7493d7e437c55905c3a6b6bc4d574 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 19:53:02 -0800 Subject: [PATCH 372/516] [PowerPC] Use llvm::is_contained (NFC) --- llvm/lib/Target/PowerPC/PPCCTRLoops.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 5320ae0da031a..87d62f1619bc3 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -275,13 +275,9 @@ void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, // merge the two-predecessor loop header with its successor. If the // successor happens to be a header of nest loop, then we will have a header // which has more than 2 predecessors. - assert(std::find(ML->getHeader()->predecessors().begin(), - ML->getHeader()->predecessors().end(), - Exiting) != ML->getHeader()->predecessors().end() && + assert(llvm::is_contained(ML->getHeader()->predecessors(), Exiting) && "Loop latch is not loop header predecessor!"); - assert(std::find(ML->getHeader()->predecessors().begin(), - ML->getHeader()->predecessors().end(), - Preheader) != ML->getHeader()->predecessors().end() && + assert(llvm::is_contained(ML->getHeader()->predecessors(), Preheader) && "Loop preheader is not loop header predecessor!"); PHIMIB.addReg(ADDIDef).addMBB(Exiting); From 585e35a998e5093e920198564f3bb3395bd47ec4 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 19:56:15 -0800 Subject: [PATCH 373/516] [mlir] Use llvm::is_contained (NFC) --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 684b911f55d45..3f65c1ce0107f 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -1373,7 +1373,7 @@ LogicalResult PerformConcurrentlyOp::verify() { // Verify that inserts are into out block arguments. Value dest = cast(op).getDest(); ArrayRef regionOutArgs = foreachThreadOp.getRegionOutArgs(); - if (llvm::find(regionOutArgs, dest) == regionOutArgs.end()) + if (!llvm::is_contained(regionOutArgs, dest)) return op.emitOpError("may only insert into an output block argument"); } return success(); From c63465bd0655686c44d59ffa3978cdc74119c4d8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 19:58:51 -0800 Subject: [PATCH 374/516] [flang] Use llvm::is_contained (NFC) --- flang/lib/Semantics/compute-offsets.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/lib/Semantics/compute-offsets.cpp b/flang/lib/Semantics/compute-offsets.cpp index 237b6b6545d73..779afa6f0bc3d 100644 --- a/flang/lib/Semantics/compute-offsets.cpp +++ b/flang/lib/Semantics/compute-offsets.cpp @@ -174,8 +174,7 @@ void ComputeOffsetsHelper::DoCommonBlock(Symbol &commonBlock) { if (const auto *baseBlock{FindCommonBlockContaining(base)}) { if (baseBlock == &commonBlock) { if (base.offset() != symbol.offset() - dep.offset || - std::find(details.objects().begin(), details.objects().end(), - base) != details.objects().end()) { + llvm::is_contained(details.objects(), base)) { context_.Say(errorSite, "'%s' is storage associated with '%s' by EQUIVALENCE elsewhere in COMMON block /%s/"_err_en_US, symbol.name(), base.name(), commonBlock.name()); From 2a67cc77e2c5227e81f7222258d22a5287af9ffe Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 20:04:53 -0800 Subject: [PATCH 375/516] [Sema] Use llvm::is_contained (NFC) --- clang/lib/Sema/SemaOpenMP.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9906f636201c2..4f5d393209292 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16585,9 +16585,8 @@ getListOfPossibleValues(OpenMPClauseKind K, unsigned First, unsigned Last, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); unsigned Skipped = Exclude.size(); - auto S = Exclude.begin(), E = Exclude.end(); for (unsigned I = First; I < Last; ++I) { - if (std::find(S, E, I) != E) { + if (llvm::is_contained(Exclude, I)) { --Skipped; continue; } From a5f368af6d9e469ad2a7f83ce5957776746fa0ca Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 20:48:55 -0800 Subject: [PATCH 376/516] [clang-tidy] Use structured bindings (NFC) --- .../clang-tidy/tool/ClangTidyMain.cpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index 67d8ccbd6cad4..f089abf69dce6 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -519,9 +519,9 @@ int clangTidyMain(int argc, const char **argv) { std::vector RawOptions = OptionsProvider->getRawOptions(FilePath); for (const std::string &Check : EnabledChecks) { - for (auto It = RawOptions.rbegin(); It != RawOptions.rend(); ++It) { - if (It->first.Checks && GlobList(*It->first.Checks).contains(Check)) { - llvm::outs() << "'" << Check << "' is enabled in the " << It->second + for (const auto &[Opts, Source] : llvm::reverse(RawOptions)) { + if (Opts.Checks && GlobList(*Opts.Checks).contains(Check)) { + llvm::outs() << "'" << Check << "' is enabled in the " << Source << ".\n"; break; } @@ -557,20 +557,16 @@ int clangTidyMain(int argc, const char **argv) { NamesAndOptions Valid = getAllChecksAndOptions(AllowEnablingAnalyzerAlphaCheckers); bool AnyInvalid = false; - for (const std::pair &OptionWithSource : - RawOptions) { - const ClangTidyOptions &Opts = OptionWithSource.first; + for (const auto &[Opts, Source] : RawOptions) { if (Opts.Checks) - AnyInvalid |= - verifyChecks(Valid.Names, *Opts.Checks, OptionWithSource.second); + AnyInvalid |= verifyChecks(Valid.Names, *Opts.Checks, Source); for (auto Key : Opts.CheckOptions.keys()) { if (Valid.Options.contains(Key)) continue; AnyInvalid = true; - auto &Output = - llvm::WithColor::warning(llvm::errs(), OptionWithSource.second) - << "unknown check option '" << Key << '\''; + auto &Output = llvm::WithColor::warning(llvm::errs(), Source) + << "unknown check option '" << Key << '\''; llvm::StringRef Closest = closest(Key, Valid.Options); if (!Closest.empty()) Output << "; did you mean '" << Closest << '\''; From 94186347a10b4bd945edaa21cfc025685e9490d7 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 6 Nov 2022 21:09:20 -0800 Subject: [PATCH 377/516] [clang] Use llvm::reverse (NFC) --- clang/lib/Sema/SemaInit.cpp | 12 ++++++------ clang/lib/Serialization/ASTReader.cpp | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 7ebf6997e27ea..d455bede6babc 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -7596,15 +7596,15 @@ static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I, } static bool pathOnlyInitializesGslPointer(IndirectLocalPath &Path) { - for (auto It = Path.rbegin(), End = Path.rend(); It != End; ++It) { - if (It->Kind == IndirectLocalPathEntry::VarInit) + for (const auto &It : llvm::reverse(Path)) { + if (It.Kind == IndirectLocalPathEntry::VarInit) continue; - if (It->Kind == IndirectLocalPathEntry::AddressOf) + if (It.Kind == IndirectLocalPathEntry::AddressOf) continue; - if (It->Kind == IndirectLocalPathEntry::LifetimeBoundCall) + if (It.Kind == IndirectLocalPathEntry::LifetimeBoundCall) continue; - return It->Kind == IndirectLocalPathEntry::GslPointerInit || - It->Kind == IndirectLocalPathEntry::GslReferenceInit; + return It.Kind == IndirectLocalPathEntry::GslPointerInit || + It.Kind == IndirectLocalPathEntry::GslReferenceInit; } return false; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 8708c4d49f8cf..05e6c6ea7952b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8228,8 +8228,8 @@ namespace serialization { /// Add the given set of methods to the method list. static void addMethodsToPool(Sema &S, ArrayRef Methods, ObjCMethodList &List) { - for (auto I = Methods.rbegin(), E = Methods.rend(); I != E; ++I) - S.addMethodToGlobalList(&List, *I); + for (ObjCMethodDecl *M : llvm::reverse(Methods)) + S.addMethodToGlobalList(&List, M); } void ASTReader::ReadMethodPool(Selector Sel) { From 27f5f33c81d9e253dd5caa31898baf9738a8b068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Fri, 4 Nov 2022 11:55:18 +0100 Subject: [PATCH 378/516] [clang][Interp][NFC] Remove an unused include And an unnecessary private marker. --- clang/lib/AST/Interp/Context.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h index 96e93dbfc48b0..feb809b69bf39 100644 --- a/clang/lib/AST/Interp/Context.h +++ b/clang/lib/AST/Interp/Context.h @@ -18,7 +18,6 @@ #include "InterpStack.h" #include "clang/AST/APValue.h" -#include "llvm/ADT/PointerIntPair.h" namespace clang { class ASTContext; @@ -69,7 +68,6 @@ class Context final { /// Checks a result from the interpreter. bool Check(State &Parent, llvm::Expected &&R); -private: /// Current compilation context. ASTContext &Ctx; /// Interpreter stack, shared across invocations. From dca7b8e23baa495d500dd1c4c1bbe9fc290530c4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 5 Nov 2022 12:16:56 -0700 Subject: [PATCH 379/516] AMDGPU: Add new test for target intrinsic aliasing metadata The intrinsic used for the test will soon be removed, so move the test to use another one. Replaces test added in fa4aac7335ac7ecabbb634d134bd4897783bf62b --- .../CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll | 9 +-------- .../AMDGPU/target-mem-intrinsic-metadata.ll | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 1ad00dd639385..23792c6df0bc8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 @@ -19,18 +18,12 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] -; MIR-LABEL: @lds_atomic_inc_ret_i32 -; MIR: DS_INC_RTN_U32 {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) store i32 %result, i32 addrspace(1)* %out ret void } -!0 = !{!1} -!1 = distinct !{!1, !2} -!2 = distinct !{!2} - ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: ; CIVI-DAG: s_mov_b32 m0 ; GFX9-NOT: m0 diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll new file mode 100644 index 0000000000000..a68be9c78f728 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll @@ -0,0 +1,20 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s + +; Make sure !noalias metadata is passed through from target intrinsics + +; MIR-LABEL: name: ds_append_noalias +; MIR: DS_APPEND {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) +define amdgpu_kernel void @ds_append_noalias() { + %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(1)* null + %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false), !noalias !0 + store i32 %val, i32 addrspace(1)* null, align 4 + ret void +} + +declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #0 + +attributes #0 = { argmemonly convergent nounwind willreturn } + +!0 = !{!1} +!1 = distinct !{!1, !2} +!2 = distinct !{!2} From 162d9030abca31a12c14c4b1051da3143f6865ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 5 Nov 2022 12:33:43 -0700 Subject: [PATCH 380/516] GlobalISel: Pass through AA metadata for target memory intrinsics The corresponding change for the DAG was done in fa4aac7335ac7ecabbb634d134bd4897783bf62b --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 69fb5bce632e8..7faae09220cc9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2481,7 +2481,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ? getLLTForMVT(Info.memVT.getSimpleVT()) : LLT::scalar(Info.memVT.getStoreSizeInBits()); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, MemTy, Alignment)); + Info.flags, MemTy, Alignment, + CI.getAAMetadata())); } return true; diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll index a68be9c78f728..9c6c8beed669b 100644 --- a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll @@ -1,4 +1,5 @@ ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s ; Make sure !noalias metadata is passed through from target intrinsics From f72416e974b3d831ffa2672202a782b17b4cd5b4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 5 Nov 2022 11:20:41 -0700 Subject: [PATCH 381/516] AMDGPU: Fix missing divergence tests for csub intrinsics --- .../Analysis/DivergenceAnalysis/AMDGPU/atomics.ll | 11 ++++++++++- .../LegacyDivergenceAnalysis/AMDGPU/atomics.ll | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll index 0833c3516d287..dec529a23107e 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -41,5 +41,14 @@ declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define amdgpu_kernel void @test_atomic_csub_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) + store i32 %ret, i32 addrspace(1)* %ptr, align 4 + ret void +} + +declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1 + attributes #0 = { nounwind } -attributes #1 = { nounwind argmemonly } +attributes #1 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll index dfb54c8f97dce..932ac8ede1f55 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -41,5 +41,14 @@ declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define amdgpu_kernel void @test_atomic_csub_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) + store i32 %ret, i32 addrspace(1)* %ptr, align 4 + ret void +} + +declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1 + attributes #0 = { nounwind } -attributes #1 = { nounwind argmemonly } +attributes #1 = { argmemonly nounwind willreturn } From 541041d1eaa83346532d16785ec0ff8d795be8ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 5 Nov 2022 11:23:27 -0700 Subject: [PATCH 382/516] AMDGPU: Fix faulty divergence analysis tests These were supposed to be checking that atomics were treated as divergence sources. However, they were using function arguments which are always treated as divergent, so they could have been found divergent for the wrong reason. --- .../Analysis/DivergenceAnalysis/AMDGPU/atomics.ll | 11 +++++++---- .../LegacyDivergenceAnalysis/AMDGPU/atomics.ll | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll index dec529a23107e..12d7fba69f487 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -1,15 +1,18 @@ ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst -define i32 @test1(i32* %ptr, i32 %val) #0 { +define amdgpu_kernel void @test1(i32* %ptr, i32 %val) #0 { %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst - ret i32 %orig + store i32 %orig, i32* %ptr + ret void } ; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) { +define amdgpu_kernel void @test2(i32* %ptr, i32 %cmp, i32 %new) { %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst - ret {i32, i1} %orig + %val = extractvalue { i32, i1 } %orig, 0 + store i32 %val, i32* %ptr + ret void } ; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll index 932ac8ede1f55..e6f2385ba130a 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -1,15 +1,18 @@ ; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -passes='print' 2>&1 -disable-output %s | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst -define i32 @test1(i32* %ptr, i32 %val) #0 { +define amdgpu_kernel void @test1(i32* %ptr, i32 %val) #0 { %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst - ret i32 %orig + store i32 %orig, i32* %ptr + ret void } ; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) { +define amdgpu_kernel void @test2(i32* %ptr, i32 %cmp, i32 %new) { %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst - ret {i32, i1} %orig + %val = extractvalue { i32, i1 } %orig, 0 + store i32 %val, i32* %ptr + ret void } ; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) From d4f884c550bec6b195eefb454636adc71449c041 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 7 Nov 2022 07:22:30 +0100 Subject: [PATCH 383/516] [clang][Interp] Add a test case for #58754 This works in the new interpreter but is rejected by the current one. Make sure it keeps working. --- clang/test/AST/Interp/cxx20.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/clang/test/AST/Interp/cxx20.cpp b/clang/test/AST/Interp/cxx20.cpp index 036e7f914bbed..ec273f0713410 100644 --- a/clang/test/AST/Interp/cxx20.cpp +++ b/clang/test/AST/Interp/cxx20.cpp @@ -86,3 +86,27 @@ constexpr int f() { } static_assert(f()); #endif + +/// Distinct literals have disctinct addresses. +/// see https://github.com/llvm/llvm-project/issues/58754 +constexpr auto foo(const char *p) { return p; } +constexpr auto p1 = "test1"; +constexpr auto p2 = "test2"; + +constexpr bool b1 = foo(p1) == foo(p1); +static_assert(b1); + +constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{declared here}} +static_assert(!b2); // ref-error {{not an integral constant expression}} \ + // ref-note {{not a constant expression}} + +constexpr auto name1() { return "name1"; } +constexpr auto name2() { return "name2"; } + +constexpr auto b3 = name1() == name1(); +static_assert(b3); +constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{declared here}} +static_assert(!b4); // ref-error {{not an integral constant expression}} \ + // ref-note {{not a constant expression}} From 10483ac743e69a6de684593565f586116b506b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 13 Oct 2022 10:09:36 +0200 Subject: [PATCH 384/516] [clang][Interp] Support pointer arithmethic in binary operators Differential Revision: https://reviews.llvm.org/D135858 --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 159 +++++++++++++++-------- clang/lib/AST/Interp/ByteCodeExprGen.h | 1 + clang/lib/AST/Interp/Interp.h | 43 +++++- clang/lib/AST/Interp/Opcodes.td | 6 + clang/lib/AST/Interp/Pointer.cpp | 2 +- clang/test/AST/Interp/arrays.cpp | 55 +++++++- 6 files changed, 204 insertions(+), 62 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 516f77cd3d602..a78758cf2e45d 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -190,67 +190,120 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { // Typecheck the args. Optional LT = classify(LHS->getType()); Optional RT = classify(RHS->getType()); - if (!LT || !RT) { + Optional T = classify(BO->getType()); + if (!LT || !RT || !T) { return this->bail(BO); } - if (Optional T = classify(BO->getType())) { - if (!visit(LHS)) + auto Discard = [this, T, BO](bool Result) { + if (!Result) return false; - if (!visit(RHS)) + return DiscardResult ? this->emitPop(*T, BO) : true; + }; + + // Pointer arithmetic special case. + if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub) { + if (*T == PT_Ptr || (*LT == PT_Ptr && *RT == PT_Ptr)) + return this->VisitPointerArithBinOp(BO); + } + + if (!visit(LHS) || !visit(RHS)) + return false; + + switch (BO->getOpcode()) { + case BO_EQ: + return Discard(this->emitEQ(*LT, BO)); + case BO_NE: + return Discard(this->emitNE(*LT, BO)); + case BO_LT: + return Discard(this->emitLT(*LT, BO)); + case BO_LE: + return Discard(this->emitLE(*LT, BO)); + case BO_GT: + return Discard(this->emitGT(*LT, BO)); + case BO_GE: + return Discard(this->emitGE(*LT, BO)); + case BO_Sub: + return Discard(this->emitSub(*T, BO)); + case BO_Add: + return Discard(this->emitAdd(*T, BO)); + case BO_Mul: + return Discard(this->emitMul(*T, BO)); + case BO_Rem: + return Discard(this->emitRem(*T, BO)); + case BO_Div: + return Discard(this->emitDiv(*T, BO)); + case BO_Assign: + if (!this->emitStore(*T, BO)) return false; + return DiscardResult ? this->emitPopPtr(BO) : true; + case BO_And: + return Discard(this->emitBitAnd(*T, BO)); + case BO_Or: + return Discard(this->emitBitOr(*T, BO)); + case BO_Shl: + return Discard(this->emitShl(*LT, *RT, BO)); + case BO_Shr: + return Discard(this->emitShr(*LT, *RT, BO)); + case BO_Xor: + return Discard(this->emitBitXor(*T, BO)); + case BO_LAnd: + case BO_LOr: + default: + return this->bail(BO); + } - auto Discard = [this, T, BO](bool Result) { - if (!Result) - return false; - return DiscardResult ? this->emitPop(*T, BO) : true; - }; - - switch (BO->getOpcode()) { - case BO_EQ: - return Discard(this->emitEQ(*LT, BO)); - case BO_NE: - return Discard(this->emitNE(*LT, BO)); - case BO_LT: - return Discard(this->emitLT(*LT, BO)); - case BO_LE: - return Discard(this->emitLE(*LT, BO)); - case BO_GT: - return Discard(this->emitGT(*LT, BO)); - case BO_GE: - return Discard(this->emitGE(*LT, BO)); - case BO_Sub: - return Discard(this->emitSub(*T, BO)); - case BO_Add: - return Discard(this->emitAdd(*T, BO)); - case BO_Mul: - return Discard(this->emitMul(*T, BO)); - case BO_Rem: - return Discard(this->emitRem(*T, BO)); - case BO_Div: - return Discard(this->emitDiv(*T, BO)); - case BO_Assign: - if (!this->emitStore(*T, BO)) - return false; - return DiscardResult ? this->emitPopPtr(BO) : true; - case BO_And: - return Discard(this->emitBitAnd(*T, BO)); - case BO_Or: - return Discard(this->emitBitOr(*T, BO)); - case BO_Shl: - return Discard(this->emitShl(*LT, *RT, BO)); - case BO_Shr: - return Discard(this->emitShr(*LT, *RT, BO)); - case BO_Xor: - return Discard(this->emitBitXor(*T, BO)); - case BO_LAnd: - case BO_LOr: - default: - return this->bail(BO); - } + llvm_unreachable("Unhandled binary op"); +} + +/// Perform addition/subtraction of a pointer and an integer or +/// subtraction of two pointers. +template +bool ByteCodeExprGen::VisitPointerArithBinOp(const BinaryOperator *E) { + BinaryOperatorKind Op = E->getOpcode(); + const Expr *LHS = E->getLHS(); + const Expr *RHS = E->getRHS(); + + if ((Op != BO_Add && Op != BO_Sub) || + (!LHS->getType()->isPointerType() && !RHS->getType()->isPointerType())) + return false; + + Optional LT = classify(LHS); + Optional RT = classify(RHS); + + if (!LT || !RT) + return false; + + if (LHS->getType()->isPointerType() && RHS->getType()->isPointerType()) { + if (Op != BO_Sub) + return false; + + assert(E->getType()->isIntegerType()); + if (!visit(RHS) || !visit(LHS)) + return false; + + return this->emitSubPtr(classifyPrim(E->getType()), E); + } + + PrimType OffsetType; + if (LHS->getType()->isIntegerType()) { + if (!visit(RHS) || !visit(LHS)) + return false; + OffsetType = *LT; + } else if (RHS->getType()->isIntegerType()) { + if (!visit(LHS) || !visit(RHS)) + return false; + OffsetType = *RT; + } else { + return false; } - return this->bail(BO); + if (Op == BO_Add) + return this->emitAddOffset(OffsetType, E); + else if (Op == BO_Sub) + return this->emitSubOffset(OffsetType, E); + + return this->bail(E); } template diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index 4c7550aa497df..27261cb130dce 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -64,6 +64,7 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, bool VisitIntegerLiteral(const IntegerLiteral *E); bool VisitParenExpr(const ParenExpr *E); bool VisitBinaryOperator(const BinaryOperator *E); + bool VisitPointerArithBinOp(const BinaryOperator *E); bool VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E); bool VisitCallExpr(const CallExpr *E); bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *E); diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 23d7b600e1adb..c032d09856215 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -466,6 +466,16 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { } else { unsigned VL = LHS.getByteOffset(); unsigned VR = RHS.getByteOffset(); + + // In our Pointer class, a pointer to an array and a pointer to the first + // element in the same array are NOT equal. They have the same Base value, + // but a different Offset. This is a pretty rare case, so we fix this here + // by comparing pointers to the first elements. + if (LHS.inArray() && LHS.isRoot()) + VL = LHS.atIndex(0).getByteOffset(); + if (RHS.inArray() && RHS.isRoot()) + VR = RHS.atIndex(0).getByteOffset(); + S.Stk.push(BoolT::from(Fn(Compare(VL, VR)))); return true; } @@ -991,23 +1001,25 @@ template bool OffsetHelper(InterpState &S, CodePtr OpPC) { // Fetch the pointer and the offset. const T &Offset = S.Stk.pop(); const Pointer &Ptr = S.Stk.pop(); - if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) - return false; + if (!CheckRange(S, OpPC, Ptr, CSK_ArrayToPointer)) return false; - // Get a version of the index comparable to the type. - T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); - // A zero offset does not change the pointer, but in the case of an array - // it has to be adjusted to point to the first element instead of the array. + // A zero offset does not change the pointer. if (Offset.isZero()) { - S.Stk.push(Index.isZero() ? Ptr.atIndex(0) : Ptr); + S.Stk.push(Ptr); return true; } + + if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) + return false; + // Arrays of unknown bounds cannot have pointers into them. if (!CheckArray(S, OpPC, Ptr)) return false; + // Get a version of the index comparable to the type. + T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); // Compute the largest index into the array. unsigned MaxIndex = Ptr.getNumElems(); @@ -1061,6 +1073,23 @@ bool SubOffset(InterpState &S, CodePtr OpPC) { return OffsetHelper(S, OpPC); } +/// 1) Pops a Pointer from the stack. +/// 2) Pops another Pointer from the stack. +/// 3) Pushes the different of the indices of the two pointers on the stack. +template ::T> +inline bool SubPtr(InterpState &S, CodePtr OpPC) { + const Pointer &LHS = S.Stk.pop(); + const Pointer &RHS = S.Stk.pop(); + + if (!Pointer::hasSameArray(LHS, RHS)) { + // TODO: Diagnose. + return false; + } + + T A = T::from(LHS.getIndex()); + T B = T::from(RHS.getIndex()); + return AddSubMulHelper(S, OpPC, A.bitWidth(), A, B); +} //===----------------------------------------------------------------------===// // Destroy diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index 9f938a6440ae9..ebb0f49bfe59f 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -390,6 +390,12 @@ def AddOffset : AluOpcode; // [Pointer, Integral] -> [Pointer] def SubOffset : AluOpcode; +// Pointer, Pointer] - [Integral] +def SubPtr : Opcode { + let Types = [IntegerTypeClass]; + let HasGroup = 1; +} + //===----------------------------------------------------------------------===// // Binary operators. //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index c7d9c3a5cd11b..b849acb52f0c1 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -202,5 +202,5 @@ bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) { } bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) { - return A.Base == B.Base && A.getFieldDesc()->IsArray; + return hasSameBase(A, B) && A.Base == B.Base && A.getFieldDesc()->IsArray; } diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index a13bce6fb27d4..bba54ee2b2fa1 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -37,6 +37,60 @@ constexpr int getElement(int i) { static_assert(getElement(1) == 4, ""); static_assert(getElement(5) == 36, ""); +constexpr int data[] = {5, 4, 3, 2, 1}; +constexpr int getElement(const int *Arr, int index) { + return *(Arr + index); +} + +static_assert(getElement(data, 1) == 4, ""); +static_assert(getElement(data, 4) == 1, ""); + +constexpr int getElementFromEnd(const int *Arr, int size, int index) { + return *(Arr + size - index - 1); +} +static_assert(getElementFromEnd(data, 5, 0) == 1, ""); +static_assert(getElementFromEnd(data, 5, 4) == 5, ""); + + +constexpr static int arr[2] = {1,2}; +constexpr static int arr2[2] = {3,4}; +constexpr int *p1 = nullptr; +constexpr int *p2 = p1 + 1; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot perform pointer arithmetic on null pointer}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot perform pointer arithmetic on null pointer}} +constexpr int *p3 = p1 + 0; +constexpr int *p4 = p1 - 0; +constexpr int *p5 = 0 + p1; +constexpr int *p6 = 0 - p1; // expected-error {{invalid operands to binary expression}} \ + // ref-error {{invalid operands to binary expression}} + +constexpr int const * ap1 = &arr[0]; +constexpr int const * ap2 = ap1 + 3; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element 3 of array of 2}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element 3 of array of 2}} + +constexpr auto ap3 = arr - 1; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element -1}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element -1}} +constexpr int k1 = &arr[1] - &arr[0]; +static_assert(k1 == 1, ""); +static_assert((&arr[0] - &arr[1]) == -1, ""); + +constexpr int k2 = &arr2[1] - &arr[0]; // expected-error {{must be initialized by a constant expression}} \ + // ref-error {{must be initialized by a constant expression}} + +static_assert((arr + 0) == arr, ""); +static_assert(&arr[0] == arr, ""); +static_assert(*(&arr[0]) == 1, ""); +static_assert(*(&arr[1]) == 2, ""); + +constexpr const int *OOB = (arr + 3) - 3; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element 3 of array of 2}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element 3 of array of 2}} template constexpr T getElementOf(T* array, int i) { @@ -52,7 +106,6 @@ constexpr T& getElementOfArray(T (&array)[N], int I) { static_assert(getElementOfArray(foo[2], 3) == &m, ""); -constexpr int data[] = {5, 4, 3, 2, 1}; static_assert(data[0] == 4, ""); // expected-error{{failed}} \ // expected-note{{5 == 4}} \ // ref-error{{failed}} \ From 7c0a2d9cda996a04c9eb55244a0ebf57545de849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Fri, 21 Oct 2022 12:26:05 +0200 Subject: [PATCH 385/516] [clang][Interp][NFC] Use StorePop for assignments with DiscardResult If we don't need the result anyway, use StorePop, instead of a Store+Pop combination. That way we save one instruction and not using the result is the common case anyway. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index a78758cf2e45d..24b5160eafbc4 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -234,9 +234,9 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { case BO_Div: return Discard(this->emitDiv(*T, BO)); case BO_Assign: - if (!this->emitStore(*T, BO)) - return false; - return DiscardResult ? this->emitPopPtr(BO) : true; + if (DiscardResult) + return this->emitStorePop(*T, BO); + return this->emitStore(*T, BO); case BO_And: return Discard(this->emitBitAnd(*T, BO)); case BO_Or: From 6e557e28ecf41353f207bc93d1a718ec508a68ff Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Mon, 7 Nov 2022 02:03:48 -0500 Subject: [PATCH 386/516] [PowerPC][NFC] use script to generate check lines --- llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll | 207 +++++++++++++++++++-- 1 file changed, 193 insertions(+), 14 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll index de0441f279971..3c9f39ff76682 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -1,11 +1,91 @@ -; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR9 +; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR8 @a = internal global fp128 0xL00000000000000000000000000000000, align 16 @x = internal global [4 x fp128] zeroinitializer, align 16 @y = internal global [4 x fp128] zeroinitializer, align 16 -define void @fmul_ctrloop_fp128() { +define void @fmul_ctrloop_fp128() nounwind { +; PWR9-LABEL: fmul_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 5, 2, a@toc@ha +; PWR9-NEXT: addis 3, 2, y@toc@ha +; PWR9-NEXT: addis 4, 2, x@toc@ha +; PWR9-NEXT: addi 5, 5, a@toc@l +; PWR9-NEXT: addi 3, 3, y@toc@l +; PWR9-NEXT: addi 4, 4, x@toc@l +; PWR9-NEXT: lxv 34, 0(5) +; PWR9-NEXT: addi 3, 3, -16 +; PWR9-NEXT: addi 4, 4, -16 +; PWR9-NEXT: li 5, 0 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB0_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lxv 35, 16(4) +; PWR9-NEXT: addi 5, 5, 16 +; PWR9-NEXT: addi 4, 4, 16 +; PWR9-NEXT: cmpldi 5, 64 +; PWR9-NEXT: xsmulqp 3, 2, 3 +; PWR9-NEXT: stxv 35, 16(3) +; PWR9-NEXT: addi 3, 3, 16 +; PWR9-NEXT: bne 0, .LBB0_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fmul_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -112(1) +; PWR8-NEXT: li 3, 48 +; PWR8-NEXT: addis 4, 2, x@toc@ha +; PWR8-NEXT: std 28, 80(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, 88(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, 96(1) # 8-byte Folded Spill +; PWR8-NEXT: li 30, 0 +; PWR8-NEXT: li 29, 16 +; PWR8-NEXT: addi 4, 4, x@toc@l +; PWR8-NEXT: std 26, 64(1) # 8-byte Folded Spill +; PWR8-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PWR8-NEXT: addis 3, 2, a@toc@ha +; PWR8-NEXT: std 27, 72(1) # 8-byte Folded Spill +; PWR8-NEXT: addi 3, 3, a@toc@l +; PWR8-NEXT: lxvd2x 0, 0, 3 +; PWR8-NEXT: addis 3, 2, y@toc@ha +; PWR8-NEXT: addi 3, 3, y@toc@l +; PWR8-NEXT: addi 28, 3, -16 +; PWR8-NEXT: addi 3, 4, -16 +; PWR8-NEXT: xxswapd 63, 0 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB0_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: lxvd2x 0, 3, 29 +; PWR8-NEXT: vmr 2, 31 +; PWR8-NEXT: addi 27, 28, 16 +; PWR8-NEXT: addi 26, 3, 16 +; PWR8-NEXT: xxswapd 35, 0 +; PWR8-NEXT: bl __mulkf3 +; PWR8-NEXT: nop +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: addi 30, 30, 16 +; PWR8-NEXT: mr 3, 26 +; PWR8-NEXT: cmpldi 30, 64 +; PWR8-NEXT: stxvd2x 0, 28, 29 +; PWR8-NEXT: mr 28, 27 +; PWR8-NEXT: bne 0, .LBB0_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: li 3, 48 +; PWR8-NEXT: ld 30, 96(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, 88(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, 80(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 27, 72(1) # 8-byte Folded Reload +; PWR8-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PWR8-NEXT: ld 26, 64(1) # 8-byte Folded Reload +; PWR8-NEXT: addi 1, 1, 112 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: %0 = load fp128, ptr @a, align 16 br label %for.body @@ -23,12 +103,63 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void - -; CHECK-LABEL: fmul_ctrloop_fp128 -; CHECK-NOT: mtctr } -define void @fpext_ctrloop_fp128(ptr %a) { +define void @fpext_ctrloop_fp128(ptr %a) nounwind { +; PWR9-LABEL: fpext_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, y@toc@ha +; PWR9-NEXT: addi 3, 3, -8 +; PWR9-NEXT: addi 4, 4, y@toc@l +; PWR9-NEXT: addi 5, 4, -16 +; PWR9-NEXT: li 4, 0 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB1_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lfdu 0, 8(3) +; PWR9-NEXT: addi 4, 4, 8 +; PWR9-NEXT: cmpldi 4, 32 +; PWR9-NEXT: xscpsgndp 34, 0, 0 +; PWR9-NEXT: xscvdpqp 2, 2 +; PWR9-NEXT: stxv 34, 16(5) +; PWR9-NEXT: addi 5, 5, 16 +; PWR9-NEXT: bne 0, .LBB1_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fpext_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -64(1) +; PWR8-NEXT: addis 4, 2, y@toc@ha +; PWR8-NEXT: addi 30, 3, -8 +; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: addi 4, 4, y@toc@l +; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB1_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: lfdu 1, 8(30) +; PWR8-NEXT: addi 29, 29, 16 +; PWR8-NEXT: bl __extenddfkf2 +; PWR8-NEXT: nop +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: addi 28, 28, 8 +; PWR8-NEXT: cmpldi 28, 32 +; PWR8-NEXT: stxvd2x 0, 0, 29 +; PWR8-NEXT: bne 0, .LBB1_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: addi 1, 1, 64 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: br label %for.body @@ -45,12 +176,63 @@ for.body: for.end: ret void - -; CHECK-LABEL: fpext_ctrloop_fp128 -; CHECK-NOT: mtctr } -define void @fptrunc_ctrloop_fp128(ptr %a) { +define void @fptrunc_ctrloop_fp128(ptr %a) nounwind { +; PWR9-LABEL: fptrunc_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, x@toc@ha +; PWR9-NEXT: addi 3, 3, -8 +; PWR9-NEXT: li 5, 0 +; PWR9-NEXT: addi 4, 4, x@toc@l +; PWR9-NEXT: addi 4, 4, -16 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB2_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lxv 34, 16(4) +; PWR9-NEXT: addi 5, 5, 8 +; PWR9-NEXT: addi 4, 4, 16 +; PWR9-NEXT: cmpldi 5, 32 +; PWR9-NEXT: xscvqpdp 2, 2 +; PWR9-NEXT: xscpsgndp 0, 34, 34 +; PWR9-NEXT: stfdu 0, 8(3) +; PWR9-NEXT: bne 0, .LBB2_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fptrunc_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -64(1) +; PWR8-NEXT: addis 4, 2, x@toc@ha +; PWR8-NEXT: addi 30, 3, -8 +; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: addi 4, 4, x@toc@l +; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB2_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: addi 29, 29, 16 +; PWR8-NEXT: lxvd2x 0, 0, 29 +; PWR8-NEXT: xxswapd 34, 0 +; PWR8-NEXT: bl __trunckfdf2 +; PWR8-NEXT: nop +; PWR8-NEXT: addi 28, 28, 8 +; PWR8-NEXT: stfdu 1, 8(30) +; PWR8-NEXT: cmpldi 28, 32 +; PWR8-NEXT: bne 0, .LBB2_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: addi 1, 1, 64 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: br label %for.body @@ -67,9 +249,6 @@ for.body: for.end: ret void - -; CHECK-LABEL: fptrunc_ctrloop_fp128 -; CHECK-NOT: mtctr } declare void @obfuscate(ptr, ...) local_unnamed_addr #2 From 9a3b969d1faa77d4c629ddb797d317579fbe0555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Fri, 21 Oct 2022 15:43:42 +0200 Subject: [PATCH 387/516] [clang][Interp][NFC] Make InitField() not pop the pointer This was confusing. InitElem peeks a pointer, while InitElemPop will pop the pointer. However, for fields, InitField would pop the pointer and no InitFieldPop exists. At least make InitField and InitElem behave the same. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 3 +++ clang/lib/AST/Interp/ByteCodeStmtGen.cpp | 3 +++ clang/lib/AST/Interp/Interp.h | 4 ++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 24b5160eafbc4..5362e9cb1ab0a 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -936,6 +936,9 @@ bool ByteCodeExprGen::visitRecordInitializer(const Expr *Initializer) { if (!this->emitInitField(*T, FieldToInit->Offset, Initializer)) return false; + + if (!this->emitPopPtr(Initializer)) + return false; } else { // Non-primitive case. Get a pointer to the field-to-initialize // on the stack and recurse into visitInitializer(). diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index bbe4d04c8974b..81243d846bc1f 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -114,6 +114,9 @@ bool ByteCodeStmtGen::visitFunc(const FunctionDecl *F) { if (!this->emitInitField(*T, F->Offset, InitExpr)) return false; + + if (!this->emitPopPtr(InitExpr)) + return false; } else { // Non-primitive case. Get a pointer to the field-to-initialize // on the stack and call visitInitialzer() for it. diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index c032d09856215..b7666745efd4a 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -735,12 +735,12 @@ bool InitThisFieldActive(InterpState &S, CodePtr OpPC, uint32_t I) { } /// 1) Pops the value from the stack -/// 2) Pops a pointer from the stack +/// 2) Peeks a pointer from the stack /// 3) Pushes the value to field I of the pointer on the stack template ::T> bool InitField(InterpState &S, CodePtr OpPC, uint32_t I) { const T &Value = S.Stk.pop(); - const Pointer &Field = S.Stk.pop().atField(I); + const Pointer &Field = S.Stk.peek().atField(I); Field.deref() = Value; Field.activate(); Field.initialize(); From 70de684d44135b4025d92b2b36ad387cf5ab8b5a Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Mon, 7 Nov 2022 08:34:40 +0100 Subject: [PATCH 388/516] [clang-format] Handle object instansiation in if-statements Before this patch code like this: ``` if (Class* obj{getObject()}) { } ``` would be mis-formated since the * would be annotated as a binaryoperator. This patch changes the * to become a PointerOrReference instead and fixes the formatting issues. Reviewed By: HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D137327 --- clang/lib/Format/TokenAnnotator.cpp | 9 ++++++++- clang/unittests/Format/TokenAnnotatorTest.cpp | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3d76cc171b0dc..dbfe88c531322 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -362,7 +362,8 @@ class AnnotatingParser { FormatToken *Next = CurrentToken->Next; if (PrevPrev && PrevPrev->is(tok::identifier) && Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && - CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { + CurrentToken->is(tok::identifier) && + !Next->isOneOf(tok::equal, tok::l_brace)) { Prev->setType(TT_BinaryOperator); LookForDecls = false; } @@ -2387,6 +2388,12 @@ class AnnotatingParser { return TT_PointerOrReference; } + // if (Class* obj { function() }) + if (PrevToken->Tok.isAnyIdentifier() && NextToken->Tok.isAnyIdentifier() && + NextToken->Next && NextToken->Next->is(tok::l_brace)) { + return TT_PointerOrReference; + } + if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete)) return TT_UnaryOperator; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index fa95f6845f077..65ecb12c46cd7 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -145,6 +145,18 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_FunctionTypeLParen); EXPECT_TOKEN(Tokens[7], tok::star, TT_UnaryOperator); EXPECT_TOKEN(Tokens[12], tok::star, TT_PointerOrReference); + + Tokens = annotate("if (Foo * Bar / Test)"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_BinaryOperator); + + Tokens = annotate("if (Class* obj {getObj()})"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference); + + Tokens = annotate("if (Foo* Bar = getObj())"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference); } TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) { From 05a113e18852ab54f22516e9ec1b6aa39adc5b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sat, 22 Oct 2022 09:27:21 +0200 Subject: [PATCH 389/516] [clang][Interp][NFC] Handle discarded ArraySubscriptExprs This is not exactly a common case, so just pop the pointer at the end if necessary. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 5362e9cb1ab0a..f3f4ae7748861 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -319,26 +319,26 @@ bool ByteCodeExprGen::VisitArraySubscriptExpr( const ArraySubscriptExpr *E) { const Expr *Base = E->getBase(); const Expr *Index = E->getIdx(); + PrimType IndexT = classifyPrim(Index->getType()); // Take pointer of LHS, add offset from RHS, narrow result. // What's left on the stack after this is a pointer. - if (Optional IndexT = classify(Index->getType())) { - if (!this->visit(Base)) - return false; + if (!this->visit(Base)) + return false; - if (!this->visit(Index)) - return false; + if (!this->visit(Index)) + return false; - if (!this->emitAddOffset(*IndexT, E)) - return false; + if (!this->emitAddOffset(IndexT, E)) + return false; - if (!this->emitNarrowPtr(E)) - return false; + if (!this->emitNarrowPtr(E)) + return false; - return true; - } + if (DiscardResult) + return this->emitPopPtr(E); - return false; + return true; } template From c40ef64462eeac9bafc28512dfaae55475016676 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Mon, 7 Nov 2022 14:37:54 +0700 Subject: [PATCH 390/516] [Greedy RegAlloc] Add a test for single block split with statepoint uses. The test shows redundant usage of callee saved register in case basic block has only one non-statepoint use. --- .../X86/statepoint-split-single-block.ll | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 llvm/test/CodeGen/X86/statepoint-split-single-block.ll diff --git a/llvm/test/CodeGen/X86/statepoint-split-single-block.ll b/llvm/test/CodeGen/X86/statepoint-split-single-block.ll new file mode 100644 index 0000000000000..61368d3101ee6 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-split-single-block.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -max-registers-for-gc-values=256 -use-registers-for-deopt-values=true -code-model=large -fixup-allow-gcptr-in-csr=true < %s | FileCheck %s + +; The test checks that Greedy register allocator should not split single basic block +; if it has only one non-statepoint use. Otherwise we may a redundant register usage. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-win64" + +define ptr addrspace(1) @foo(ptr addrspace(1) %arg) gc "statepoint-example" { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; CHECK-NEXT: movabsq $nocsr, %rax +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: movabsq $bar, %rax +; CHECK-NEXT: movq (%rsp), %rbx # 8-byte Reload +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: movq %rbx, (%rsp) # 8-byte Spill +; CHECK-NEXT: movabsq $nocsr, %rax +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + ; force spill %arg on stack. + call void @nocsr() + br label %do_call + +do_call: + ; Basic block with two use intructions inside: + ; copy to rdi as an argument to call and use in statepoint instruction as gc-live and deopt value. + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void (ptr addrspace(1))) @bar, i32 1, i32 0, ptr addrspace(1) %arg, i32 0, i32 0) [ "deopt"(ptr addrspace(1) %arg), "gc-live"(ptr addrspace(1) %arg) ] + %arg.reloc = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %statepoint_token, i32 0, i32 0) + br label %next + +next: + ; force spill %arg.reloc on stack. + call void @nocsr() + ret ptr addrspace(1) %arg.reloc +} + +declare void @nocsr() "no_callee_saved_registers" +declare void @bar(ptr addrspace(1)) +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32 immarg, i32 immarg) From f4707af2944015732b1f35e1bc66cc62f489cef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 30 Oct 2022 10:05:42 +0100 Subject: [PATCH 391/516] [clang][Interp][NFCI] Cleanup emitConst() Before, when emitting a regular integer constant, we went: Int -> APInt -> int -> emit Fix this by using regular integer constants in emitConst() and instead converting APInt to those once. --- clang/lib/AST/Interp/ByteCodeExprGen.cpp | 51 ++++++++++++------------ clang/lib/AST/Interp/ByteCodeExprGen.h | 14 +++---- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index f3f4ae7748861..91b9809861d8b 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -160,9 +160,7 @@ bool ByteCodeExprGen::VisitIntegerLiteral(const IntegerLiteral *LE) { if (DiscardResult) return true; - if (Optional T = classify(LE->getType())) - return emitConst(*T, LE->getValue(), LE); - return this->bail(LE); + return this->emitConst(LE->getValue(), LE); } template @@ -380,7 +378,7 @@ bool ByteCodeExprGen::VisitUnaryExprOrTypeTraitExpr( Size = Ctx.getASTContext().getTypeSizeInChars(ArgType); } - return this->emitConst(E, Size.getQuantity()); + return this->emitConst(Size.getQuantity(), E); } return false; @@ -420,9 +418,7 @@ bool ByteCodeExprGen::VisitArrayInitIndexExpr( // stand-alone, e.g. via EvaluateAsInt(). if (!ArrayIndex) return false; - QualType IndexType = E->getType(); - APInt Value(getIntWidth(IndexType), *ArrayIndex); - return this->emitConst(classifyPrim(IndexType), Value, E); + return this->emitConst(*ArrayIndex, E); } template @@ -470,7 +466,7 @@ bool ByteCodeExprGen::VisitStringLiteral(const StringLiteral *E) { template bool ByteCodeExprGen::VisitCharacterLiteral( const CharacterLiteral *E) { - return this->emitConst(E, E->getValue()); + return this->emitConst(E->getValue(), E); } template @@ -715,27 +711,27 @@ bool ByteCodeExprGen::dereferenceVar( } template -bool ByteCodeExprGen::emitConst(PrimType T, const APInt &Value, - const Expr *E) { - switch (T) { +template +bool ByteCodeExprGen::emitConst(T Value, const Expr *E) { + switch (classifyPrim(E->getType())) { case PT_Sint8: - return this->emitConstSint8(Value.getSExtValue(), E); + return this->emitConstSint8(Value, E); case PT_Uint8: - return this->emitConstUint8(Value.getZExtValue(), E); + return this->emitConstUint8(Value, E); case PT_Sint16: - return this->emitConstSint16(Value.getSExtValue(), E); + return this->emitConstSint16(Value, E); case PT_Uint16: - return this->emitConstUint16(Value.getZExtValue(), E); + return this->emitConstUint16(Value, E); case PT_Sint32: - return this->emitConstSint32(Value.getSExtValue(), E); + return this->emitConstSint32(Value, E); case PT_Uint32: - return this->emitConstUint32(Value.getZExtValue(), E); + return this->emitConstUint32(Value, E); case PT_Sint64: - return this->emitConstSint64(Value.getSExtValue(), E); + return this->emitConstSint64(Value, E); case PT_Uint64: - return this->emitConstUint64(Value.getZExtValue(), E); + return this->emitConstUint64(Value, E); case PT_Bool: - return this->emitConstBool(Value.getBoolValue(), E); + return this->emitConstBool(Value, E); case PT_Ptr: llvm_unreachable("Invalid integral type"); break; @@ -743,6 +739,13 @@ bool ByteCodeExprGen::emitConst(PrimType T, const APInt &Value, llvm_unreachable("unknown primitive type"); } +template +bool ByteCodeExprGen::emitConst(const APSInt &Value, const Expr *E) { + if (Value.isSigned()) + return this->emitConst(Value.getSExtValue(), E); + return this->emitConst(Value.getZExtValue(), E); +} + template unsigned ByteCodeExprGen::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, @@ -1198,7 +1201,7 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { return this->emitIncPop(*T, E); this->emitLoad(*T, E); - this->emitConst(E, 1); + this->emitConst(1, E); this->emitAdd(*T, E); return this->emitStore(*T, E); } @@ -1211,7 +1214,7 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { return this->emitDecPop(*T, E); this->emitLoad(*T, E); - this->emitConst(E, 1); + this->emitConst(1, E); this->emitSub(*T, E); return this->emitStore(*T, E); } @@ -1284,9 +1287,7 @@ bool ByteCodeExprGen::VisitDeclRefExpr(const DeclRefExpr *E) { return this->emitGetPtrParam(It->second, E); } } else if (const auto *ECD = dyn_cast(Decl)) { - PrimType T = *classify(ECD->getType()); - - return this->emitConst(T, ECD->getInitVal(), E); + return this->emitConst(ECD->getInitVal(), E); } return false; diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index 27261cb130dce..9b53065945173 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -229,16 +229,14 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, DerefKind AK, llvm::function_ref Direct, llvm::function_ref Indirect); - /// Emits an APInt constant. - bool emitConst(PrimType T, const llvm::APInt &Value, const Expr *E); + /// Emits an APSInt constant. + bool emitConst(const APSInt &Value, const Expr *E); + bool emitConst(const APInt &Value, const Expr *E) { + return emitConst(static_cast(Value), E); + } /// Emits an integer constant. - template bool emitConst(const Expr *E, T Value) { - QualType Ty = E->getType(); - APInt WrappedValue(getIntWidth(Ty), static_cast(Value), - std::is_signed::value); - return emitConst(*Ctx.classify(Ty), WrappedValue, E); - } + template bool emitConst(T Value, const Expr *E); /// Emits the initialized pointer. bool emitInitFn() { From abe2738b7b02f93941b9e193e637e99d4e3c1581 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 7 Nov 2022 09:23:07 +0100 Subject: [PATCH 392/516] [mlir] Apply two ClangTidy findings. - argument name 'isLastOutput' in comment does not match parameter name 'hasOutput'. - override is redundant since the function is already declared 'final'. --- mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp | 2 +- mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 11e9a649984cf..533d31fdb5536 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -56,7 +56,7 @@ struct CodeGen { CodeGen(SparsificationOptions o, ValueRange tensors, unsigned numTensors, unsigned numLoops, OpOperand *op, unsigned nest, std::vector &ts) - : options(o), loopEmitter(tensors, /*isLastOutput=*/true, + : options(o), loopEmitter(tensors, /*hasOutput=*/true, /*isSparseOut=*/op != nullptr), sparseOut(op), outerParNest(nest), topSort(ts) { if (op) diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp index ca285465648ff..aa79e54b9b306 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp @@ -46,9 +46,8 @@ struct MaskOpRewritePattern : OpRewritePattern { using OpRewritePattern::OpRewritePattern; private: - LogicalResult - matchAndRewrite(MaskOp maskOp, - PatternRewriter &rewriter) const override final { + LogicalResult matchAndRewrite(MaskOp maskOp, + PatternRewriter &rewriter) const final { MaskableOpInterface maskableOp = maskOp.getMaskableOp(); SourceOp sourceOp = dyn_cast(maskableOp.getOperation()); if (!sourceOp) From 41fa7d2093e0b7ff5f729b318643e8e177778feb Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Mon, 31 Oct 2022 03:22:29 -0400 Subject: [PATCH 393/516] [clangd] Fix a small inconsistency in system-include-extractor.test Also add an explanatory comment Differential Revision: https://reviews.llvm.org/D137056 --- clang-tools-extra/clangd/test/system-include-extractor.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test index c861a2346470e..b109aa67aad1c 100644 --- a/clang-tools-extra/clangd/test/system-include-extractor.test +++ b/clang-tools-extra/clangd/test/system-include-extractor.test @@ -11,9 +11,11 @@ # RUN: echo '#!/bin/sh' >> %t.dir/bin/my_driver.sh # RUN: echo '[ "$0" = "%t.dir/bin/my_driver.sh" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo 'args="$*"' >> %t.dir/bin/my_driver.sh +# Check that clangd preserves certain flags like `-nostdinc` from +# original invocation in compile_commands.json. # RUN: echo '[ -z "${args##*"-nostdinc"*}" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo '[ -z "${args##*"-isysroot=/isysroot"*}" ] || exit' >> %t.dir/bin/my_driver.sh -# RUN: echo 'echo " $* " | grep " --sysroot /my/sysroot/path " || exit' >> %t.dir/bin/my_driver.sh +# RUN: echo '[ -z "${args##*"--sysroot /my/sysroot/path"*}" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo 'echo line to ignore >&2' >> %t.dir/bin/my_driver.sh # RUN: echo 'printf "Target: arm-linux-gnueabihf\r\n" >&2' >> %t.dir/bin/my_driver.sh # RUN: echo 'printf "#include <...> search starts here:\r\n" >&2' >> %t.dir/bin/my_driver.sh From c8341a66159703de242ab8de362b59548cdda71e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 31 Oct 2022 12:58:27 +0100 Subject: [PATCH 394/516] [clang][Interp][NFC] Avoid a getSource() call in the common case In the common (successful) case, we don't need the getSource() call, so move it to the two if statement bodies instead. --- clang/lib/AST/Interp/Interp.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index a5984a21efb19..b22756a803459 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -201,8 +201,8 @@ bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { - const auto &Src = S.Current->getSource(OpPC); if (Ptr.isZero()) { + const auto &Src = S.Current->getSource(OpPC); if (Ptr.isField()) S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; @@ -213,6 +213,7 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, } if (!Ptr.isLive()) { + const auto &Src = S.Current->getSource(OpPC); bool IsTemp = Ptr.isTemporary(); S.FFDiag(Src, diag::note_constexpr_lifetime_ended, 1) << AK << !IsTemp; From 6b3e5c595b6930ae87aef6d75377663d238c0921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Sun, 30 Oct 2022 10:22:44 +0100 Subject: [PATCH 395/516] [clang][Interp][NFC] Remove unused function --- clang/lib/AST/Interp/Interp.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index b7666745efd4a..a90e1246311d8 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -112,8 +112,6 @@ bool CheckDivRem(InterpState &S, CodePtr OpPC, const T &LHS, const T &RHS) { return true; } -template inline bool IsTrue(const T &V) { return !V.isZero(); } - /// Interpreter entry point. bool Interpret(InterpState &S, APValue &Result); From 5bd6bd12276ff5e5c38002cf607976e8ac9ed8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 31 Oct 2022 08:33:30 +0100 Subject: [PATCH 396/516] [clang][Interp][NFC] Simplify visitReturnStmt() --- clang/lib/AST/Interp/ByteCodeStmtGen.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index 81243d846bc1f..a6aa8d88622a0 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -237,12 +237,11 @@ bool ByteCodeStmtGen::visitReturnStmt(const ReturnStmt *RS) { this->emitCleanup(); return this->emitRetVoid(RS); } - } else { - this->emitCleanup(); - if (!this->emitRetVoid(RS)) - return false; - return true; } + + // Void return. + this->emitCleanup(); + return this->emitRetVoid(RS); } template From 5dfacb12452026eb9492d046e816fafa0ff5915c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 31 Oct 2022 09:08:20 +0100 Subject: [PATCH 397/516] [clang][Interp][NFC] Replace dyn_cast_or_null with _if_present ... in Descriptor.h --- clang/lib/AST/Interp/Descriptor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index dacec6be89c74..b2f50815fe848 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -113,15 +113,15 @@ struct Descriptor final { const Expr *asExpr() const { return Source.dyn_cast(); } const ValueDecl *asValueDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } const FieldDecl *asFieldDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } const RecordDecl *asRecordDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } /// Returns the size of the object without metadata. From 9e885d9aaba140a583f3bd2f38db73c123b60ef9 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 7 Nov 2022 09:04:06 +0000 Subject: [PATCH 398/516] [InstSimplify] Add tests for (~A & B) | ~(A | B) --> ~A with logical And. NFC --- llvm/test/Transforms/InstCombine/or.ll | 21 ++++++++++ llvm/test/Transforms/InstSimplify/or.ll | 54 +++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 38409394654f4..fa10eefb87987 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -1561,3 +1561,24 @@ define i32 @mul_common_bits(i32 %p) { %r = or i32 %m, %x ret i32 %r } + +define <4 x i1> @and_or_not_or_logical_vec(<4 x i32> %ap, <4 x i32> %bp) { +; CHECK-LABEL: @and_or_not_or_logical_vec( +; CHECK-NEXT: [[A:%.*]] = icmp eq <4 x i32> [[AP:%.*]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = icmp eq <4 x i32> [[BP:%.*]], zeroinitializer +; CHECK-NEXT: [[V:%.*]] = xor <4 x i1> [[A]], +; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[B]], <4 x i1> [[V]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[W:%.*]] = or <4 x i1> [[B]], [[A]] +; CHECK-NEXT: [[Y:%.*]] = xor <4 x i1> [[W]], +; CHECK-NEXT: [[Z:%.*]] = or <4 x i1> [[X]], [[Y]] +; CHECK-NEXT: ret <4 x i1> [[Z]] +; + %A = icmp eq <4 x i32> %ap, zeroinitializer + %B = icmp eq <4 x i32> %bp, zeroinitializer + %V = xor <4 x i1> %A, + %X = select <4 x i1> %B, <4 x i1> %V, <4 x i1> zeroinitializer + %W = or <4 x i1> %B, %A + %Y = xor <4 x i1> %W, + %Z = or <4 x i1> %X, %Y + ret <4 x i1> %Z +} diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index a279cab582d93..ddfa85cc4b642 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -454,6 +454,60 @@ define <2 x i4> @and_or_not_or_commute7(<2 x i4> %A, <2 x i4> %B) { ret <2 x i4> %r } +; (~A & B) | ~(A | B) --> ~A with logical and +define i1 @and_or_not_or_logical(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_or_logical( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: [[X:%.*]] = select i1 [[V]], i1 [[B:%.*]], i1 false +; CHECK-NEXT: [[W:%.*]] = or i1 [[B]], [[A]] +; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true +; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[Z]] +; + %V = xor i1 %A, true + %X = select i1 %V, i1 %B, i1 false + %W = or i1 %B, %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + +; (~B & A) | ~(A | B) --> ~A with logical and +define i1 @and_or_not_or_logical_rev(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_or_logical_rev( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: [[X:%.*]] = select i1 [[B:%.*]], i1 [[V]], i1 false +; CHECK-NEXT: [[W:%.*]] = or i1 [[B]], [[A]] +; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true +; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[Z]] +; + %V = xor i1 %A, true + %X = select i1 %B, i1 %V, i1 false + %W = or i1 %B, %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + +; (~A & B) | ~(A | B) --> ~A with logical And and logical Or +define i1 @and_or_not_logical_or_logical_rev(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_logical_or_logical_rev( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: [[X:%.*]] = select i1 [[B:%.*]], i1 [[V]], i1 false +; CHECK-NEXT: [[W:%.*]] = select i1 [[B]], i1 true, i1 [[A]] +; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true +; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[Z]] +; + %V = xor i1 %A, true + %X = select i1 %B, i1 %V, i1 false + %W = select i1 %B, i1 true, i1 %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + ; negative test - It is not safe to propagate an undef element from the 'not' op. define <2 x i4> @and_or_not_or_commute7_undef_elt(<2 x i4> %A, <2 x i4> %B) { From d9176563dc223589da43cccd47fe2584590c4b3b Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 7 Nov 2022 16:36:26 +0800 Subject: [PATCH 399/516] [X86] Add missing `IntrArgMemOnly` for intrinsics Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D137406 --- llvm/include/llvm/IR/IntrinsicsX86.td | 143 ++++++++++++++++---------- 1 file changed, 89 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 9ce993446d07e..72fbf8e9a4c8f 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5325,6 +5325,11 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; + // AMX-FP16 - Intel FP16 AMX extensions + def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg>, ImmArg>, + ImmArg>]>; // AMX - internal intrinsics def int_x86_ldtilecfg_internal : ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">, @@ -5383,74 +5388,104 @@ let TargetPrefix = "x86" in { DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// let TargetPrefix = "x86" in { +// CMPCCXADD def int_x86_cmpccxadd32 : ClangBuiltin<"__builtin_ia32_cmpccxadd32">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg>]>; + [IntrArgMemOnly, ImmArg>]>; def int_x86_cmpccxadd64 : ClangBuiltin<"__builtin_ia32_cmpccxadd64">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], - [ImmArg>]>; -} -//===----------------------------------------------------------------------===// -let TargetPrefix = "x86" in { -// AMX_FP16 - Intel FP16 AMX extensions -def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, - DefaultAttrsIntrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], - [ImmArg>, ImmArg>, - ImmArg>]>; -def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, - DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>; -def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, - DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>; + [IntrArgMemOnly, ImmArg>]>; + +// AVX-NE-CONVERT +def int_x86_vbcstnebf162ps128 + : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnebf162ps256 + : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnesh2ps128 + : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnesh2ps256 + : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneebf162ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneebf162ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneeph2ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneeph2ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneobf162ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneobf162ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneoph2ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneoph2ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneps2bf16128 + : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_x86_vcvtneps2bf16256 + : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics let TargetPrefix = "x86" in { - def int_x86_aadd32 : ClangBuiltin<"__builtin_ia32_aadd32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aadd64 : ClangBuiltin<"__builtin_ia32_aadd64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_aand32 : ClangBuiltin<"__builtin_ia32_aand32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aand64 : ClangBuiltin<"__builtin_ia32_aand64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_aor32 : ClangBuiltin<"__builtin_ia32_aor32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aor64 : ClangBuiltin<"__builtin_ia32_aor64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_axor32 : ClangBuiltin<"__builtin_ia32_axor32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_axor64 : ClangBuiltin<"__builtin_ia32_axor64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + def int_x86_aadd32 + : ClangBuiltin<"__builtin_ia32_aadd32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aadd64 + : ClangBuiltin<"__builtin_ia32_aadd64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_aand32 + : ClangBuiltin<"__builtin_ia32_aand32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aand64 + : ClangBuiltin<"__builtin_ia32_aand64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_aor32 + : ClangBuiltin<"__builtin_ia32_aor32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aor64 + : ClangBuiltin<"__builtin_ia32_aor64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_axor32 + : ClangBuiltin<"__builtin_ia32_axor32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_axor64 + : ClangBuiltin<"__builtin_ia32_axor64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; } //===----------------------------------------------------------------------===// From a2620e00ffa232a406de3a1d8634beeda86956fd Mon Sep 17 00:00:00 2001 From: OCHyams Date: Thu, 3 Nov 2022 09:50:31 +0000 Subject: [PATCH 400/516] [Assignment Tracking][3/*] Add DIAssignID metadata boilerplate The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Add the DIAssignID metadata attachment boilerplate. Includes a textual-bitcode roundtrip test and tests that the verifier and parser catch badly formed IR. This piece of metadata links together stores (used as an attachment) and the yet-to-be-added llvm.dbg.assign debug intrinsic (used as an operand). Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D132222 --- llvm/include/llvm-c/DebugInfo.h | 3 +- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 3 +- llvm/include/llvm/IR/DebugInfoMetadata.h | 36 +++++++++++++++++++ llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/Metadata.def | 1 + llvm/lib/AsmParser/LLParser.cpp | 18 ++++++++++ llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 13 +++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 11 ++++++ llvm/lib/IR/AsmWriter.cpp | 6 ++++ llvm/lib/IR/DebugInfo.cpp | 8 +++-- llvm/lib/IR/DebugInfoMetadata.cpp | 7 ++++ llvm/lib/IR/Verifier.cpp | 17 +++++++++ .../parse-and-verify/distinct.ll | 9 +++++ .../parse-and-verify/instruction-type.ll | 36 +++++++++++++++++++ .../parse-and-verify/operands.ll | 9 +++++ .../parse-and-verify/roundtrip.ll | 33 +++++++++++++++++ 16 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 8554a01998736..ef6a147eb2a52 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -169,7 +169,8 @@ enum { LLVMDICommonBlockMetadataKind, LLVMDIStringTypeMetadataKind, LLVMDIGenericSubrangeMetadataKind, - LLVMDIArgListMetadataKind + LLVMDIArgListMetadataKind, + LLVMDIAssignIDMetadataKind, }; typedef unsigned LLVMMetadataKind; diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index ee5669c6c6aa8..74a51d5ce6907 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -349,7 +349,8 @@ enum MetadataCodes { // info. METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] METADATA_GENERIC_SUBRANGE = 45, // [distinct, count, lo, up, stride] - METADATA_ARG_LIST = 46 // [n x [type num, value num]] + METADATA_ARG_LIST = 46, // [n x [type num, value num]] + METADATA_ASSIGN_ID = 47, // [distinct, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 5b20bf3ade99a..f57691f6f9fc6 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -215,6 +215,7 @@ class DINode : public MDNode { case DIImportedEntityKind: case DIModuleKind: case DIGenericSubrangeKind: + case DIAssignIDKind: return true; } } @@ -295,6 +296,41 @@ class GenericDINode : public DINode { } }; +/// Assignment ID. +/// Used to link stores (as an attachment) and dbg.assigns (as an operand). +/// DIAssignID metadata is never uniqued as we compare instances using +/// referential equality (the instance/address is the ID). +class DIAssignID : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + + DIAssignID(LLVMContext &C, StorageType Storage) + : MDNode(C, DIAssignIDKind, Storage, None) {} + + ~DIAssignID() { dropAllReferences(); } + + static DIAssignID *getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate = true); + + TempDIAssignID cloneImpl() const { return getTemporary(getContext()); } + +public: + // This node has no operands to replace. + void replaceOperandWith(unsigned I, Metadata *New) = delete; + + static DIAssignID *getDistinct(LLVMContext &Context) { + return getImpl(Context, Distinct); + } + static TempDIAssignID getTemporary(LLVMContext &Context) { + return TempDIAssignID(getImpl(Context, Temporary)); + } + // NOTE: Do not define get(LLVMContext&) - see class comment. + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIAssignIDKind; + } +}; + /// Array subrange. /// /// TODO: Merge into node for DW_TAG_array_type, which should have a custom diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 3d986325c5d33..8723bf2a0680c 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -49,3 +49,4 @@ LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) +LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index bbf349e6b508c..36c34c1d2347c 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -110,6 +110,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILabel) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_LEAF(DIAssignID) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 8767da472ed60..25204847ca9ce 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4681,6 +4681,24 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { return false; } +/// parseDIAssignID: +/// ::= distinct !DIAssignID() +bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { + if (!IsDistinct) + return Lex.Error("missing 'distinct', required for !DIAssignID()"); + + Lex.Lex(); + + // Now eat the parens. + if (parseToken(lltok::lparen, "expected '(' here")) + return true; + if (parseToken(lltok::rparen, "expected ')' here")) + return true; + + Result = DIAssignID::getDistinct(Context); + return false; +} + /// parseGenericDINode: /// ::= !GenericDINode(tag: 15, header: "...", operands: {...}) bool LLParser::parseGenericDINode(MDNode *&Result, bool IsDistinct) { diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 02d76f61695af..1ac1502e8aefb 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -856,6 +856,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { case bitc::METADATA_TEMPLATE_VALUE: case bitc::METADATA_GLOBAL_VAR: case bitc::METADATA_LOCAL_VAR: + case bitc::METADATA_ASSIGN_ID: case bitc::METADATA_LABEL: case bitc::METADATA_EXPRESSION: case bitc::METADATA_OBJC_PROPERTY: @@ -1964,6 +1965,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } + case bitc::METADATA_ASSIGN_ID: { + if (Record.size() != 1) + return error("Invalid DIAssignID record."); + + IsDistinct = Record[0] & 1; + if (!IsDistinct) + return error("Invalid DIAssignID record. Must be distinct"); + + MetadataList.assignValue(DIAssignID::getDistinct(Context), NextMetadataNo); + NextMetadataNo++; + break; + } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. if (Record.size() < 8 || Record.size() > 10) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 85ada2c6b52ef..4bf881a479170 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,6 +340,8 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { unsigned Abbrev); void writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev); + void writeDIAssignID(const DIAssignID *N, SmallVectorImpl &Record, + unsigned Abbrev); void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev); @@ -1949,6 +1951,15 @@ void ModuleBitcodeWriter::writeDIModule(const DIModule *N, Record.clear(); } +void ModuleBitcodeWriter::writeDIAssignID(const DIAssignID *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + // There are no arguments for this metadata type. + Record.push_back(N->isDistinct()); + Stream.EmitRecord(bitc::METADATA_ASSIGN_ID, Record, Abbrev); + Record.clear(); +} + void ModuleBitcodeWriter::writeDITemplateTypeParameter( const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 21e662bed6b25..d49b8710bc9a4 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1865,6 +1865,12 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL, Out << ")"; } +static void writeDIAssignID(raw_ostream &Out, const DIAssignID *DL, + AsmWriterContext &WriterCtx) { + Out << "!DIAssignID()"; + MDFieldPrinter Printer(Out, WriterCtx); +} + static void writeDISubrange(raw_ostream &Out, const DISubrange *N, AsmWriterContext &WriterCtx) { Out << "!DISubrange("; diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 8f6d58cb90b90..d30fca63067c0 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -469,9 +469,13 @@ bool llvm::stripDebugInfo(Function &F) { if (NewLoopID != LoopID) I.setMetadata(LLVMContext::MD_loop, NewLoopID); } - // Strip heapallocsite attachments, they point into the DIType system. - if (I.hasMetadataOtherThanDebugLoc()) + // Strip other attachments that are or use debug info. + if (I.hasMetadataOtherThanDebugLoc()) { + // Heapallocsites point into the DIType system. I.setMetadata("heapallocsite", nullptr); + // DIAssignID are debug info metadata primitives. + I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); + } } } return Changed; diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 9b4f92a63c5e2..5483595a78667 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1253,6 +1253,13 @@ bool DIExpression::startsWithDeref() const { return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; } +DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate) { + // Uniqued DIAssignID are not supported as the instance address *is* the ID. + assert(Storage != StorageType::Uniqued && "uniqued DIAssignID unsupported"); + return storeImpl(new (0u, Storage) DIAssignID(Context, Storage), Storage); +} + unsigned DIExpression::ExprOperand::getSize() const { uint64_t Op = getOp(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 3c68f07cb230d..f0097da60c9af 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -471,6 +471,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitCallStackMetadata(MDNode *MD); void visitMemProfMetadata(Instruction &I, MDNode *MD); void visitCallsiteMetadata(Instruction &I, MDNode *MD); + void visitDIAssignIDMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -1483,6 +1484,11 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { CheckDI(!isa(Ty), "invalid type", &N, N.getType()); } +void Verifier::visitDIAssignID(const DIAssignID &N) { + CheckDI(!N.getNumOperands(), "DIAssignID has no arguments", &N); + CheckDI(N.isDistinct(), "DIAssignID must be distinct", &N); +} + void Verifier::visitDILabel(const DILabel &N) { if (auto *S = N.getRawScope()) CheckDI(isa(S), "invalid scope", &N, S); @@ -4529,6 +4535,14 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } +void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { + assert(I.hasMetadata(LLVMContext::MD_DIAssignID)); + bool ExpectedInstTy = + isa(I) || isa(I) || isa(I); + CheckDI(ExpectedInstTy, "!DIAssignID attached to unexpected instruction kind", + I, MD); +} + void Verifier::visitCallStackMetadata(MDNode *MD) { // Call stack metadata should consist of a list of at least 1 constant int // (representing a hash of the location). @@ -4830,6 +4844,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) visitCallsiteMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_DIAssignID)) + visitDIAssignIDMetadata(I, MD); + if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll new file mode 100644 index 0000000000000..2cc5452fe7d2a --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: missing 'distinct', required for !DIAssignID() + +!1 = !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll new file mode 100644 index 0000000000000..d0f447ee200b6 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll @@ -0,0 +1,36 @@ +; RUN: opt -S %s -verify -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; NOTE: Expect opt to return zero because the badly formed debug info +;; is going to be stripped. + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +;; Check verifier output. +; CHECK: !DIAssignID attached to unexpected instruction kind + +;; Check DIAssignID is stripped from IR. +; CHECK: define dso_local void @fun() { +; CHECK-NOT: DIAssignID + +define dso_local void @fun() !dbg !7 { +entry: + ret void, !DIAssignID !14 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!14 = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll new file mode 100644 index 0000000000000..79adcb9ce2d12 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: expected ')' here + +!1 = distinct !DIAssignID(0) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll new file mode 100644 index 0000000000000..1ddb95b79b0f0 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll @@ -0,0 +1,33 @@ +; RUN: opt %s -verify -experimental-assignment-tracking \ +; RUN: | opt -verify -S -experimental-assignment-tracking \ +; RUN: | FileCheck %s + +;; Roundtrip test (text -> bitcode -> text) for DIAssignID attachments. + +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID:[0-9]+]] +; CHECK-DAG: ![[ID]] = distinct !DIAssignID() + +define dso_local void @fun() !dbg !7 { +entry: + %local = alloca i32, align 4, !DIAssignID !14 + ret void, !dbg !13 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 3, column: 1, scope: !7) +!14 = distinct !DIAssignID() From 83255c4a626df5df539a71fba864c99fcb1cb674 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Mon, 7 Nov 2022 17:15:59 +0800 Subject: [PATCH 401/516] Recommit [AArch64] Improve codegen for shifted mask op The original change compares `APInt` to check the constant is the same or not. But shift amount may have different constant types. So, this patch change to use `getZExtValue` to compare constant value. Original comment: The special case for bit extraction pattern is `((x >> C) & mask) << C`. It can be combined to `x & (mask << C)` by return true in isDesirableToCommuteWithShift. Fix: #56427 Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D136014 --- .../Target/AArch64/AArch64ISelLowering.cpp | 20 ++++++--- llvm/test/CodeGen/AArch64/shift-logic.ll | 43 +++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6571ddd7cb12b..fce26dd6c21f5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14442,15 +14442,23 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift 'N' to let it be lowered to UBFX. + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not + // combine it with shift 'N' to let it be lowered to UBFX except: + // ((x >> C) & mask) << C. if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && isa(ShiftLHS.getOperand(1))) { uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); - if (isMask_64(TruncMask) && - ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && - isa(ShiftLHS.getOperand(0).getOperand(1))) - return false; + if (isMask_64(TruncMask)) { + SDValue AndLHS = ShiftLHS.getOperand(0); + if (AndLHS.getOpcode() == ISD::SRL) { + if (auto *SRLC = dyn_cast(AndLHS.getOperand(1))) { + if (N->getOpcode() == ISD::SHL) + if (auto *SHLC = dyn_cast(N->getOperand(1))) + return SRLC->getZExtValue() == SHLC->getZExtValue(); + return false; + } + } + } } return true; } diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll index af684bbb8aff7..be1ddccf901b8 100644 --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -151,3 +151,46 @@ define i32 @lshr_or_extra_use(i32 %x, i32 %y, i32* %p) nounwind { %sh1 = lshr i32 %r, 7 ret i32 %sh1 } + +define i64 @desirable_to_commute1(i64 %x) { +; CHECK-LABEL: desirable_to_commute1: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7fff8 +; CHECK-NEXT: ret + %s1 = lshr i64 %x, 3 + %a = and i64 %s1, 65535 + %s2 = shl i64 %a, 3 + ret i64 %s2 +} + +define i64 @desirable_to_commute2(i64* %p, i64 %i) { +; CHECK-LABEL: desirable_to_commute2: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x1, #0x1ff8 +; CHECK-NEXT: ldr x0, [x0, x8] +; CHECK-NEXT: ret + %lshr = lshr i64 %i, 3 + %and = and i64 %lshr, 1023 + %pidx = getelementptr i64, i64* %p, i64 %and + %r = load i64, i64* %pidx + ret i64 %r +} + +; Shrink demanded op will shrink the shl to i32, +; Lshr and shl will have different shift amount type. +; Compare apint will cause crash when type is different. +define void @apint_type_mismatch(i16 %a, i32* %p) { +; CHECK-LABEL: apint_type_mismatch: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0x7f8 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret +entry: + %lshr = lshr i16 %a, 3 + %and = and i16 %lshr, 255 + %zext = zext i16 %and to i64 + %shl = shl i64 %zext, 3 + %trunc = trunc i64 %shl to i32 + store i32 %trunc, i32* %p + ret void +} From eb421c0c0edf8420b00bc8d51bb1160c3eda9661 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Mon, 7 Nov 2022 04:10:06 -0500 Subject: [PATCH 402/516] [PowerPC][NFC] fix the LIT regressions This is to fix the wrong checking introdued in D64195. `std {{[0-9]+}}, 16(1)` is the store for the lr register. It breaks previous testing point before D64195. --- llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll index af24164496ffd..0a448a934d74a 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll @@ -50,7 +50,7 @@ entry: ret void } ; CHECK: @caller2 -; CHECK: std {{[0-9]+}}, 16(1) +; CHECK: stw {{[0-9]+}}, 156(1) ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll index fa2e09a8e4b6b..39948fec8150b 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -50,7 +50,7 @@ entry: ret void } ; CHECK: @caller2 -; CHECK: std {{[0-9]+}}, 16({{[0-9]+}}) +; CHECK: stw {{[0-9]+}}, 136({{[0-9]+}}) ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) From c8be35293c82f216e975c56f62ddf9199a22f2e2 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Fri, 21 Oct 2022 15:42:56 +0100 Subject: [PATCH 403/516] [SWP] Recognize mem carried dep with different base The loop-carried dependency detection logic in isLoopCarriedDep relies on the load and store using the same definition for the base register. This misses the case of post-increment loads and stores whose base register are different PHI initialized from the same initial value. This commit extends the logic to accept the load and store having different PHI base address provided that they had the same initial value when entering the loop and are incremented by the same amount in each loop. Reviewed By: bcahoon Differential Revision: https://reviews.llvm.org/D136463 --- llvm/lib/CodeGen/MachinePipeliner.cpp | 26 +++++--- .../test/CodeGen/Hexagon/swp-carried-dep3.mir | 60 +++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 721bd52448ace..3333cbd109586 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -2277,20 +2277,28 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, assert(!OffsetSIsScalable && !OffsetDIsScalable && "Expected offsets to be byte offsets"); - if (!BaseOpS->isIdenticalTo(*BaseOpD)) + MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg()); + MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg()); + if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI()) + return true; + + unsigned InitValS = 0; + unsigned LoopValS = 0; + unsigned InitValD = 0; + unsigned LoopValD = 0; + getPhiRegs(*DefS, BB, InitValS, LoopValS); + getPhiRegs(*DefD, BB, InitValD, LoopValD); + MachineInstr *InitDefS = MRI.getVRegDef(InitValS); + MachineInstr *InitDefD = MRI.getVRegDef(InitValD); + + if (!InitDefS->isIdenticalTo(*InitDefD)) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); - if (!Def || !Def->isPHI()) - return true; - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*Def, BB, InitVal, LoopVal); - MachineInstr *LoopDef = MRI.getVRegDef(LoopVal); + MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS); int D = 0; - if (!LoopDef || !TII->getIncrementValue(*LoopDef, D)) + if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) return true; uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir new file mode 100644 index 0000000000000..a1b0aec4cf81c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir @@ -0,0 +1,60 @@ +# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s +# REQUIRES: asserts + +# Test that the loop carried dependence check correctly identifies a recurrence +# when load and store use distinct increment for their pointer. To test this, +# we check that we don't have the Rec NodeSet containing SU(5) and SU(7) which +# requires to use a single CHECK-NOT to match such a Rec NodeSet. Fortunately +# the atom '.' does not match a newline but anything else on a line. + +# CHECK-NOT: Rec NodeSet{{.+[[:space:]]}} SU(5){{.+[[:space:]]}} SU(7) + +... +--- +name: test +tracksRegLiveness: true + +body: | + bb.0: + successors: %bb.3, %bb.1 + liveins: $r0, $r1, $r2 + + %14:intregs = COPY $r2 + %13:intregs = COPY $r1 + %12:intregs = COPY $r0 + %16:predregs = C2_cmpeqi %14, 2 + %15:intregs = A2_tfrsi 0 + J2_jumpt killed %16, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2 + + %0:intregs = A2_addi %14, -2 + %1:intregs = A2_addi %12, 10 + %2:intregs = A2_addi %13, 4 + %17:intregs = A2_tfrsi 0 + %23:intregs = COPY %0 + J2_loop0r %bb.2, %23, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3, %bb.2 + + %3:intregs = PHI %2, %bb.1, %10, %bb.2 + %4:intregs = PHI %1, %bb.1, %9, %bb.2 + %21:intregs = PHI %1, %bb.1, %22, %bb.2 + %6:intregs = PHI %17, %bb.1, %7, %bb.2 + %18:intregs, %10:intregs = L2_loadrh_pi %3, 2 :: (load (s16)) + %19:intregs, %22:intregs = L2_loadrh_pi %21, 2 :: (load (s16)) + %20:intregs = A2_addi %18, 10 + %9:intregs = S2_storerh_pi %4, 2, killed %20 :: (store (s16)) + %7:intregs = M2_acci %19, %6, %18 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %11:intregs = PHI %15, %bb.0, %7, %bb.2 + $r0 = COPY %11 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... From b46427b9a2fa2587e9087ab7e2bbb2c5abbb30e3 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 7 Nov 2022 10:03:18 +0000 Subject: [PATCH 404/516] [InstSimplify] (~A & B) | ~(A | B) --> ~A with logical and According to https://alive2.llvm.org/ce/z/opsdrb, it is valid to convert (~A & B) | ~(A | B) --> ~A even if the And is a Logical And. This came up from the vector masking of predicated blocks. Differential Revision: https://reviews.llvm.org/D137435 --- llvm/lib/Analysis/InstructionSimplify.cpp | 9 +++++++++ llvm/test/Transforms/InstCombine/or.ll | 10 ++-------- llvm/test/Transforms/InstSimplify/or.ll | 18 +++--------------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index acfac8cb9437c..98db224e9e809 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2198,6 +2198,7 @@ Value *llvm::simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::simplifyAndInst(Op0, Op1, Q, RecursionLimit); } +// TODO: Many of these folds could use LogicalAnd/LogicalOr. static Value *simplifyOrLogic(Value *X, Value *Y) { assert(X->getType() == Y->getType() && "Expected same type for 'or' ops"); Type *Ty = X->getType(); @@ -2262,6 +2263,14 @@ static Value *simplifyOrLogic(Value *X, Value *Y) { m_Value(B))) && match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return NotA; + // The same is true of Logical And + // TODO: This could share the logic of the version above if there was a + // version of LogicalAnd that allowed more than just i1 types. + if (match(X, m_c_LogicalAnd( + m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))), + m_Value(B))) && + match(Y, m_Not(m_c_LogicalOr(m_Specific(A), m_Specific(B))))) + return NotA; // ~(A ^ B) | (A & B) --> ~(A ^ B) // ~(A ^ B) | (B & A) --> ~(A ^ B) diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index fa10eefb87987..25cf241f44c07 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -1564,14 +1564,8 @@ define i32 @mul_common_bits(i32 %p) { define <4 x i1> @and_or_not_or_logical_vec(<4 x i32> %ap, <4 x i32> %bp) { ; CHECK-LABEL: @and_or_not_or_logical_vec( -; CHECK-NEXT: [[A:%.*]] = icmp eq <4 x i32> [[AP:%.*]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = icmp eq <4 x i32> [[BP:%.*]], zeroinitializer -; CHECK-NEXT: [[V:%.*]] = xor <4 x i1> [[A]], -; CHECK-NEXT: [[X:%.*]] = select <4 x i1> [[B]], <4 x i1> [[V]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[W:%.*]] = or <4 x i1> [[B]], [[A]] -; CHECK-NEXT: [[Y:%.*]] = xor <4 x i1> [[W]], -; CHECK-NEXT: [[Z:%.*]] = or <4 x i1> [[X]], [[Y]] -; CHECK-NEXT: ret <4 x i1> [[Z]] +; CHECK-NEXT: [[A:%.*]] = icmp ne <4 x i32> [[AP:%.*]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[A]] ; %A = icmp eq <4 x i32> %ap, zeroinitializer %B = icmp eq <4 x i32> %bp, zeroinitializer diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index ddfa85cc4b642..913b760dd331c 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -458,11 +458,7 @@ define <2 x i4> @and_or_not_or_commute7(<2 x i4> %A, <2 x i4> %B) { define i1 @and_or_not_or_logical(i1 %A, i1 %B) { ; CHECK-LABEL: @and_or_not_or_logical( ; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: [[X:%.*]] = select i1 [[V]], i1 [[B:%.*]], i1 false -; CHECK-NEXT: [[W:%.*]] = or i1 [[B]], [[A]] -; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true -; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 [[V]] ; %V = xor i1 %A, true %X = select i1 %V, i1 %B, i1 false @@ -476,11 +472,7 @@ define i1 @and_or_not_or_logical(i1 %A, i1 %B) { define i1 @and_or_not_or_logical_rev(i1 %A, i1 %B) { ; CHECK-LABEL: @and_or_not_or_logical_rev( ; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: [[X:%.*]] = select i1 [[B:%.*]], i1 [[V]], i1 false -; CHECK-NEXT: [[W:%.*]] = or i1 [[B]], [[A]] -; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true -; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 [[V]] ; %V = xor i1 %A, true %X = select i1 %B, i1 %V, i1 false @@ -494,11 +486,7 @@ define i1 @and_or_not_or_logical_rev(i1 %A, i1 %B) { define i1 @and_or_not_logical_or_logical_rev(i1 %A, i1 %B) { ; CHECK-LABEL: @and_or_not_logical_or_logical_rev( ; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true -; CHECK-NEXT: [[X:%.*]] = select i1 [[B:%.*]], i1 [[V]], i1 false -; CHECK-NEXT: [[W:%.*]] = select i1 [[B]], i1 true, i1 [[A]] -; CHECK-NEXT: [[Y:%.*]] = xor i1 [[W]], true -; CHECK-NEXT: [[Z:%.*]] = or i1 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 [[V]] ; %V = xor i1 %A, true %X = select i1 %B, i1 %V, i1 false From c37f29c49ea237eaf65dd137bffc1f3316f82951 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Mon, 7 Nov 2022 09:31:45 +0000 Subject: [PATCH 405/516] [Assignment Tracking][4/*] Add llvm.dbg.assign intrinsic boilerplate The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Add the llvm.dbg.assign intrinsic boilerplate. This updates the textual-bitcode roundtrip test to also check that round-tripping with the intrinsic works. The intrinsic marks the position of a source level assignment. The llvm.dbg.assign interface looks like this (each parameter is wrapped in MetadataAsValue, and Value * type parameters are first wrapped in ValueAsMetadata): void @llvm.dbg.assign(Value *Value, DIExpression *ValueExpression, DILocalVariable *Variable, DIAssignID *ID, Value *Address, DIExpression *AddressExpression) The first three parameters look and behave like an llvm.dbg.value. ID is a reference to a store. The intrinsic is "linked to" instructions in the same function that use the same ID as an attachment. That is mostly conceptual at this point; the two-way link infrastructure will come in another patch. Address is the destination address of the store and it is modified by AddressExpression. LLVM currently encodes variable fragment information in DIExpressions, so as an implementation quirk the FragmentInfo for Variable is contained within ValueExpression only. Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D132223 --- llvm/include/llvm/IR/IntrinsicInst.h | 65 ++++++++++++- llvm/include/llvm/IR/Intrinsics.td | 7 ++ llvm/lib/IR/IntrinsicInst.cpp | 41 ++++++++- llvm/lib/IR/Verifier.cpp | 24 +++++ .../parse-and-verify/roundtrip.ll | 92 ++++++++++++++++++- .../parse-and-verify/verify.ll | 52 +++++++++++ 6 files changed, 270 insertions(+), 11 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 4ff48c3669d50..f78e45c0e32ee 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -91,6 +91,7 @@ class IntrinsicInst : public CallInst { case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::dbg_assign: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: @@ -129,6 +130,7 @@ static inline bool isDbgInfoIntrinsic(Intrinsic::ID ID) { case Intrinsic::dbg_value: case Intrinsic::dbg_addr: case Intrinsic::dbg_label: + case Intrinsic::dbg_assign: return true; default: return false; @@ -231,10 +233,12 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { bool hasArgList() const { return isa(getRawLocation()); } - /// Does this describe the address of a local variable. True for dbg.addr - /// and dbg.declare, but not dbg.value, which describes its value. + /// Does this describe the address of a local variable. True for dbg.addr and + /// dbg.declare, but not dbg.value, which describes its value, or dbg.assign, + /// which describes a combination of the variable's value and address. bool isAddressOfVariable() const { - return getIntrinsicID() != Intrinsic::dbg_value; + return getIntrinsicID() != Intrinsic::dbg_value && + getIntrinsicID() != Intrinsic::dbg_assign; } void setUndef() { @@ -286,6 +290,11 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { /// is described. Optional getFragmentSizeInBits() const; + /// Get the FragmentInfo for the variable. + Optional getFragment() const { + return getExpression()->getFragmentInfo(); + } + /// \name Casting methods /// @{ static bool classof(const IntrinsicInst *I) { @@ -293,6 +302,7 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_addr: + case Intrinsic::dbg_assign: return true; default: return false; @@ -302,7 +312,7 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { return isa(V) && classof(cast(V)); } /// @} -private: +protected: void setArgOperand(unsigned i, Value *v) { DbgInfoIntrinsic::setArgOperand(i, v); } @@ -363,7 +373,52 @@ class DbgValueInst : public DbgVariableIntrinsic { /// \name Casting methods /// @{ static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_value; + return I->getIntrinsicID() == Intrinsic::dbg_value || + I->getIntrinsicID() == Intrinsic::dbg_assign; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + +/// This represents the llvm.dbg.assign instruction. +class DbgAssignIntrinsic : public DbgValueInst { + enum Operands { + OpValue, + OpVar, + OpExpr, + OpAssignID, + OpAddress, + OpAddressExpr, + }; + +public: + Value *getAddress() const; + Metadata *getRawAddress() const { + return cast(getArgOperand(OpAddress))->getMetadata(); + } + Metadata *getRawAssignID() const { + return cast(getArgOperand(OpAssignID))->getMetadata(); + } + DIAssignID *getAssignID() const { return cast(getRawAssignID()); } + Metadata *getRawAddressExpression() const { + return cast(getArgOperand(OpAddressExpr))->getMetadata(); + } + DIExpression *getAddressExpression() const { + return cast(getRawAddressExpression()); + } + void setAddressExpression(DIExpression *NewExpr) { + setArgOperand(OpAddressExpr, + MetadataAsValue::get(NewExpr->getContext(), NewExpr)); + } + void setAssignId(DIAssignID *New); + void setAddress(Value *V); + void setValue(Value *V); + /// \name Casting methods + /// @{ + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::dbg_assign; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e8fb5c4aef191..3050bd2acec73 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -998,6 +998,13 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; + def int_dbg_assign : DefaultAttrsIntrinsic<[], + [llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty]>; def int_dbg_label : DefaultAttrsIntrinsic<[], [llvm_metadata_ty]>; } diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 8ca75f58e4033..b6537b2077ebe 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -112,10 +112,23 @@ static ValueAsMetadata *getAsMetadata(Value *V) { void DbgVariableIntrinsic::replaceVariableLocationOp(Value *OldValue, Value *NewValue) { + // If OldValue is used as the address part of a dbg.assign intrinsic replace + // it with NewValue and return true. + auto ReplaceDbgAssignAddress = [this, OldValue, NewValue]() -> bool { + auto *DAI = dyn_cast(this); + if (!DAI || OldValue != DAI->getAddress()) + return false; + DAI->setAddress(NewValue); + return true; + }; + bool DbgAssignAddrReplaced = ReplaceDbgAssignAddress(); + (void)DbgAssignAddrReplaced; + assert(NewValue && "Values must be non-null"); auto Locations = location_ops(); auto OldIt = find(Locations, OldValue); - assert(OldIt != Locations.end() && "OldValue must be a current location"); + assert((OldIt != Locations.end() || DbgAssignAddrReplaced) && + "OldValue must be a current location"); if (!hasArgList()) { Value *NewOperand = isa(NewValue) ? NewValue @@ -172,6 +185,32 @@ Optional DbgVariableIntrinsic::getFragmentSizeInBits() const { return getVariable()->getSizeInBits(); } +Value *DbgAssignIntrinsic::getAddress() const { + auto *MD = getRawAddress(); + if (auto *V = dyn_cast(MD)) + return V->getValue(); + + // When the value goes to null, it gets replaced by an empty MDNode. + assert(!cast(MD)->getNumOperands() && "Expected an empty MDNode"); + return nullptr; +} + +void DbgAssignIntrinsic::setAssignId(DIAssignID *New) { + setOperand(OpAssignID, MetadataAsValue::get(getContext(), New)); +} + +void DbgAssignIntrinsic::setAddress(Value *V) { + assert(V->getType()->isPointerTy() && + "Destination Component must be a pointer type"); + setOperand(OpAddress, + MetadataAsValue::get(getContext(), ValueAsMetadata::get(V))); +} + +void DbgAssignIntrinsic::setValue(Value *V) { + setOperand(OpValue, + MetadataAsValue::get(getContext(), ValueAsMetadata::get(V))); +} + int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, StringRef Name) { assert(Name.startswith("llvm.")); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f0097da60c9af..324fa66fb2ea9 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4541,6 +4541,15 @@ void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { isa(I) || isa(I) || isa(I); CheckDI(ExpectedInstTy, "!DIAssignID attached to unexpected instruction kind", I, MD); + // Iterate over the MetadataAsValue uses of the DIAssignID - these should + // only be found as DbgAssignIntrinsic operands. + if (auto *AsValue = MetadataAsValue::getIfExists(Context, MD)) { + for (auto *User : AsValue->users()) { + CheckDI(isa(User), + "!DIAssignID should only be used by llvm.dbg.assign intrinsics", + MD, User); + } + } } void Verifier::visitCallStackMetadata(MDNode *MD) { @@ -5023,6 +5032,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::dbg_value: // llvm.dbg.value visitDbgIntrinsic("value", cast(Call)); break; + case Intrinsic::dbg_assign: // llvm.dbg.assign + visitDbgIntrinsic("assign", cast(Call)); + break; case Intrinsic::dbg_label: // llvm.dbg.label visitDbgLabelIntrinsic("label", cast(Call)); break; @@ -5986,6 +5998,18 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, DII.getRawExpression()); + if (auto *DAI = dyn_cast(&DII)) { + CheckDI(isa(DAI->getRawAssignID()), + "invalid llvm.dbg.assign intrinsic DIAssignID", &DII, + DAI->getRawAssignID()); + CheckDI(isa(DAI->getRawAddress()), + "invalid llvm.dbg.assign intrinsic address)", &DII, + DAI->getRawAddress()); + CheckDI(isa(DAI->getRawAddressExpression()), + "invalid llvm.dbg.assign intrinsic address expression", &DII, + DAI->getRawAddressExpression()); + } + // Ignore broken !dbg attachments; they're checked elsewhere. if (MDNode *N = DII.getDebugLoc().getAsMDNode()) if (!isa(N)) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll index 1ddb95b79b0f0..808636a1a0e58 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll @@ -2,17 +2,81 @@ ; RUN: | opt -verify -S -experimental-assignment-tracking \ ; RUN: | FileCheck %s -;; Roundtrip test (text -> bitcode -> text) for DIAssignID attachments. - -; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID:[0-9]+]] -; CHECK-DAG: ![[ID]] = distinct !DIAssignID() +;; Roundtrip test (text -> bitcode -> text) for DIAssignID metadata and +;; llvm.dbg.assign intrinsics. +;; DIAssignID attachment only. +; CHECK-LABEL: @fun() +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID1:[0-9]+]] define dso_local void @fun() !dbg !7 { entry: %local = alloca i32, align 4, !DIAssignID !14 ret void, !dbg !13 } +;; Unlinked llvm.dbg.assign. +; CHECK-DAG: @fun2() +; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR2:[0-9]+]], metadata !DIExpression(), metadata ![[ID2:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG2:[0-9]+]] +define dso_local void @fun2() !dbg !15 { +entry: + %local = alloca i32, align 4 + call void @llvm.dbg.assign(metadata i32 undef, metadata !16, metadata !DIExpression(), metadata !18, metadata i32 undef, metadata !DIExpression()), !dbg !17 + ret void, !dbg !17 +} + +;; An llvm.dbg.assign linked to an alloca. +; CHECK-LABEL: @fun3() +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID3:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR3:[0-9]+]], metadata !DIExpression(), metadata ![[ID3]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG3:[0-9]+]] +define dso_local void @fun3() !dbg !19 { +entry: + %local = alloca i32, align 4, !DIAssignID !22 + call void @llvm.dbg.assign(metadata i32 undef, metadata !20, metadata !DIExpression(), metadata !22, metadata i32 undef, metadata !DIExpression()), !dbg !21 + ret void, !dbg !21 +} + +;; Check that using a DIAssignID as an operand before using it as an attachment +;; works (the order of the alloca and dbg.assign has been swapped). +; CHECK-LABEL: @fun4() +; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR4:[0-9]+]], metadata !DIExpression(), metadata ![[ID4:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG4:[0-9]+]] +; CHECK-NEXT: %local = alloca i32, align 4, !DIAssignID ![[ID4]] +define dso_local void @fun4() !dbg !23 { +entry: + call void @llvm.dbg.assign(metadata i32 undef, metadata !24, metadata !DIExpression(), metadata !26, metadata i32 undef, metadata !DIExpression()), !dbg !25 + %local = alloca i32, align 4, !DIAssignID !26 + ret void, !dbg !25 +} + +;; Check that the value and address operands print correctly. +;; There are currently no plans to support DIArgLists for the address component. +; CHECK-LABEL: @fun5 +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID5:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata i32 %v, metadata ![[VAR5:[0-9]+]], metadata !DIExpression(), metadata ![[ID5]], metadata i32* %local, metadata !DIExpression()), !dbg ![[DBG5:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata !DIArgList(i32 %v, i32 1), metadata ![[VAR5]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), metadata ![[ID5]], metadata i32* %local, metadata !DIExpression()), !dbg ![[DBG5]] +define dso_local void @fun5(i32 %v) !dbg !27 { +entry: + %local = alloca i32, align 4, !DIAssignID !30 + call void @llvm.dbg.assign(metadata i32 %v, metadata !28, metadata !DIExpression(), metadata !30, metadata i32* %local, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.assign(metadata !DIArgList(i32 %v, i32 1), metadata !28, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), metadata !30, metadata i32* %local, metadata !DIExpression()), !dbg !29 + ret void +} + +; CHECK-DAG: ![[ID1]] = distinct !DIAssignID() +; CHECK-DAG: ![[ID2]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR2]] = !DILocalVariable(name: "local2", +; CHECK-DAG: ![[DBG2]] = !DILocation(line: 2 +; CHECK-DAG: ![[ID3]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR3]] = !DILocalVariable(name: "local3", +; CHECK-DAG: ![[DBG3]] = !DILocation(line: 3, +; CHECK-DAG: ![[ID4]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR4]] = !DILocalVariable(name: "local4", +; CHECK-DAG: ![[DBG4]] = !DILocation(line: 4, +; CHECK-DAG: ![[ID5]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR5]] = !DILocalVariable(name: "local5", +; CHECK-DAG: ![[DBG5]] = !DILocation(line: 5, + +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5} !llvm.ident = !{!6} @@ -29,5 +93,23 @@ entry: !9 = !{null} !10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!13 = !DILocation(line: 3, column: 1, scope: !7) +!13 = !DILocation(line: 1, column: 1, scope: !7) !14 = distinct !DIAssignID() +!15 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!16 = !DILocalVariable(name: "local2", scope: !15, file: !1, line: 2, type: !11) +!17 = !DILocation(line: 2, column: 1, scope: !15) +!18 = distinct !DIAssignID() +!19 = distinct !DISubprogram(name: "fun3", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!20 = !DILocalVariable(name: "local3", scope: !19, file: !1, line: 2, type: !11) +!21 = !DILocation(line: 3, column: 1, scope: !19) +!22 = distinct !DIAssignID() +!23 = distinct !DISubprogram(name: "fun4", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!24 = !DILocalVariable(name: "local4", scope: !23, file: !1, line: 2, type: !11) +!25 = !DILocation(line: 4, column: 1, scope: !23) +!26 = distinct !DIAssignID() +!27 = distinct !DISubprogram(name: "fun5", scope: !1, file: !1, line: 1, type: !31, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!28 = !DILocalVariable(name: "local5", scope: !27, file: !1, line: 2, type: !11) +!29 = !DILocation(line: 5, column: 1, scope: !27) +!30 = distinct !DIAssignID() +!31 = !DISubroutineType(types: !32) +!32 = !{null, !11} diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll new file mode 100644 index 0000000000000..577289604d536 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll @@ -0,0 +1,52 @@ +; RUN: opt %s -S -verify -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. +;; +;; Checks for this one are inline. + +define dso_local void @fun() !dbg !7 { +entry: + %a = alloca i32, align 4, !DIAssignID !14 + ;; Here something other than a dbg.assign intrinsic is using a DIAssignID. + ; CHECK: !DIAssignID should only be used by llvm.dbg.assign intrinsics + call void @llvm.dbg.value(metadata !14, metadata !10, metadata !DIExpression()), !dbg !13 + + ;; Each following dbg.assign has an argument of the incorrect type. + ; CHECK: invalid llvm.dbg.assign intrinsic address/value + call void @llvm.dbg.assign(metadata !3, metadata !10, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic variable + call void @llvm.dbg.assign(metadata i32 0, metadata !2, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic expression + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !2, metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic DIAssignID + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !2, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic address + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !14, metadata !3, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic address expression + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !2), !dbg !13 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 1, column: 1, scope: !7) +!14 = distinct !DIAssignID() From 2bf960aef08e93d687f21e6d636186561b56cbf3 Mon Sep 17 00:00:00 2001 From: Christian Kandeler Date: Tue, 21 Jun 2022 15:56:21 +0200 Subject: [PATCH 406/516] [clangd] Add "usedAsMutablePointer" highlighting modifier Counterpart to "usedAsMutableReference". Just as for references, there are const and non-const pointer parameters, and it's valuable to be able to have different highlighting for the two cases at the call site. We could have re-used the existing modifier, but having a dedicated one maximizes client flexibility. Reviewed By: nridge Differential Revision: https://reviews.llvm.org/D130015 --- .../clangd/SemanticHighlighting.cpp | 21 ++++++++++++++----- .../clangd/SemanticHighlighting.h | 1 + .../clangd/test/initialize-params.test | 1 + .../clangd/test/semantic-tokens.test | 8 +++---- .../unittests/SemanticHighlightingTests.cpp | 6 +++--- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index af3a3e6f8e941..dd9392b029df8 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -597,19 +597,27 @@ class CollectExtraHighlightings if (!Arg) return; - // Is this parameter passed by non-const reference? + // Is this parameter passed by non-const pointer or reference? // FIXME The condition T->idDependentType() could be relaxed a bit, // e.g. std::vector& is dependent but we would want to highlight it - if (!T->isLValueReferenceType() || - T.getNonReferenceType().isConstQualified() || T->isDependentType()) { + bool IsRef = T->isLValueReferenceType(); + bool IsPtr = T->isPointerType(); + if ((!IsRef && !IsPtr) || T->getPointeeType().isConstQualified() || + T->isDependentType()) { return; } llvm::Optional Location; - // FIXME Add "unwrapping" for ArraySubscriptExpr and UnaryOperator, + // FIXME Add "unwrapping" for ArraySubscriptExpr, // e.g. highlight `a` in `a[i]` // FIXME Handle dependent expression types + if (auto *IC = dyn_cast(Arg)) + Arg = IC->getSubExprAsWritten(); + if (auto *UO = dyn_cast(Arg)) { + if (UO->getOpcode() == UO_AddrOf) + Arg = UO->getSubExpr(); + } if (auto *DR = dyn_cast(Arg)) Location = DR->getLocation(); else if (auto *M = dyn_cast(Arg)) @@ -617,7 +625,8 @@ class CollectExtraHighlightings if (Location) H.addExtraModifier(*Location, - HighlightingModifier::UsedAsMutableReference); + IsRef ? HighlightingModifier::UsedAsMutableReference + : HighlightingModifier::UsedAsMutablePointer); } void @@ -1140,6 +1149,8 @@ llvm::StringRef toSemanticTokenModifier(HighlightingModifier Modifier) { return "defaultLibrary"; case HighlightingModifier::UsedAsMutableReference: return "usedAsMutableReference"; // nonstandard + case HighlightingModifier::UsedAsMutablePointer: + return "usedAsMutablePointer"; // nonstandard case HighlightingModifier::ConstructorOrDestructor: return "constructorOrDestructor"; // nonstandard case HighlightingModifier::FunctionScope: diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h index 79ecb344275d1..64ad431909faa 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.h +++ b/clang-tools-extra/clangd/SemanticHighlighting.h @@ -71,6 +71,7 @@ enum class HighlightingModifier { DependentName, DefaultLibrary, UsedAsMutableReference, + UsedAsMutablePointer, ConstructorOrDestructor, FunctionScope, diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test index eb958cac20279..a2df61ca75235 100644 --- a/clang-tools-extra/clangd/test/initialize-params.test +++ b/clang-tools-extra/clangd/test/initialize-params.test @@ -68,6 +68,7 @@ # CHECK-NEXT: "dependentName", # CHECK-NEXT: "defaultLibrary", # CHECK-NEXT: "usedAsMutableReference", +# CHECK-NEXT: "usedAsMutablePointer", # CHECK-NEXT: "constructorOrDestructor", # CHECK-NEXT: "functionScope", # CHECK-NEXT: "classScope", diff --git a/clang-tools-extra/clangd/test/semantic-tokens.test b/clang-tools-extra/clangd/test/semantic-tokens.test index 5abe78e9a51e1..b3a92b7cc737b 100644 --- a/clang-tools-extra/clangd/test/semantic-tokens.test +++ b/clang-tools-extra/clangd/test/semantic-tokens.test @@ -23,7 +23,7 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # CHECK-NEXT: "resultId": "1" # CHECK-NEXT: } @@ -49,7 +49,7 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # Inserted at position 1 # CHECK-NEXT: "deleteCount": 0, @@ -72,12 +72,12 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771, +# CHECK-NEXT: 65539, # CHECK-NEXT: 1, # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # CHECK-NEXT: "resultId": "3" # CHECK-NEXT: } diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 9abc49bb06014..3ea4a58a83a70 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -382,7 +382,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { void $Function_def[[foo]]() { $Class[[F]] $LocalVariable_def[[FF]]; $Class[[G]]<$Class[[F]], &$Class[[F]]::$Method[[f]]> $LocalVariable_def[[GG]]; - $LocalVariable[[GG]].$Method[[foo]](&$LocalVariable[[FF]]); + $LocalVariable[[GG]].$Method[[foo]](&$LocalVariable_usedAsMutablePointer[[FF]]); $Class[[A]]<$Function[[foo]]> $LocalVariable_def[[AA]]; } )cpp", @@ -781,14 +781,14 @@ sizeof...($TemplateParameter[[Elements]]); const int* $LocalVariable_def_readonly[[constPtr]]; int** $LocalVariable_def[[array]]; $Function[[fun]]($LocalVariable[[val]], $LocalVariable[[val]], - $LocalVariable[[ptr]], $LocalVariable_readonly[[constPtr]], + $LocalVariable_usedAsMutablePointer[[ptr]], $LocalVariable_readonly[[constPtr]], $LocalVariable_usedAsMutableReference[[val]], $LocalVariable[[val]], $LocalVariable_usedAsMutableReference[[ptr]], $LocalVariable_readonly_usedAsMutableReference[[constPtr]], $LocalVariable_readonly[[constPtr]], - $LocalVariable[[array]], $LocalVariable_usedAsMutableReference[[array]], + $LocalVariable_usedAsMutablePointer[[array]], $LocalVariable_usedAsMutableReference[[array]], $LocalVariable[[array]] ); [](int){}($LocalVariable[[val]]); From 171f7024cc82e8702abebdedb699d37b50574be7 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Mon, 7 Nov 2022 11:56:36 +0000 Subject: [PATCH 407/516] [Assignment Tracking][5/*] Add core infrastructure for instruction reference The Assignment Tracking debug-info feature is outlined in this RFC: https://discourse.llvm.org/t/ rfc-assignment-tracking-a-better-way-of-specifying-variable-locations-in-ir Overview It's possible to find intrinsics linked to an instruction by looking at the MetadataAsValue uses of the attached DIAssignID. That covers instruction -> intrinsic(s) lookup. Add a global DIAssignID -> instruction(s) map which gives us the ability to perform intrinsic -> instruction(s) lookup. Add plumbing to keep the map up to date through optimisations and add utility functions including two that perform those lookups. Finally, add a unittest. Details In llvm/lib/IR/LLVMContextImpl.h add AssignmentIDToInstrs which maps DIAssignID * attachments to Instruction *s. Because the DIAssignID * is the key we can't use a TrackingMDNodeRef for it, and therefore cannot easily update the mapping when a temporary DIAssignID is replaced. Temporary DIAssignID's are only used in IR parsing to deal with metadata forward references. Update llvm/lib/AsmParser/LLParser.cpp to avoid using temporary DIAssignID's for attachments. In llvm/lib/IR/Metadata.cpp add Instruction::updateDIAssignIDMapping which is called to remove or add an entry (or both) to AssignmentIDToInstrs. Call this from Instruction::setMetadata and add a call to setMetadata in Intruction's dtor that explicitly unsets the DIAssignID so that the mappging gets updated. In llvm/lib/IR/DebugInfo.cpp and DebugInfo.h add utility functions: getAssignmentInsts(const DbgAssignIntrinsic *DAI) getAssignmentMarkers(const Instruction *Inst) RAUW(DIAssignID *Old, DIAssignID *New) deleteAll(Function *F) These core utils are tested in llvm/unittests/IR/DebugInfoTest.cpp. Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D132224 --- llvm/include/llvm/AsmParser/LLParser.h | 6 + llvm/include/llvm/IR/DebugInfo.h | 63 ++++++++- llvm/include/llvm/IR/Instruction.h | 4 + llvm/lib/AsmParser/LLParser.cpp | 19 ++- llvm/lib/IR/DebugInfo.cpp | 60 +++++++++ llvm/lib/IR/Instruction.cpp | 4 + llvm/lib/IR/LLVMContextImpl.h | 5 + llvm/lib/IR/Metadata.cpp | 41 ++++++ llvm/lib/IR/Verifier.cpp | 9 ++ .../parse-and-verify/verify.ll | 8 ++ llvm/unittests/IR/DebugInfoTest.cpp | 126 ++++++++++++++++++ 11 files changed, 342 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 8757543071559..e9813c34ce373 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -108,6 +108,12 @@ namespace llvm { SmallVector InstsWithTBAATag; + /// DIAssignID metadata does not support temporary RAUW so we cannot use + /// the normal metadata forward reference resolution method. Instead, + /// non-temporary DIAssignID are attached to instructions (recorded here) + /// then replaced later. + DenseMap> TempDIAssignIDAttachments; + // Type resolution handling data structures. The location is set when we // have processed a use of the type but not a definition yet. StringMap > NamedTypes; diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index b35d447a7c891..8f49d39f373a5 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -21,7 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IntrinsicInst.h" namespace llvm { @@ -159,6 +159,67 @@ class DebugInfoFinder { SmallPtrSet NodesSeen; }; +/// Assignment Tracking (at). +namespace at { +// +// Utilities for enumerating storing instructions from an assignment ID. +// +/// A range of instructions. +using AssignmentInstRange = + iterator_range::iterator>; +/// Return a range of instructions (typically just one) that have \p ID +/// as an attachment. +/// Iterators invalidated by adding or removing DIAssignID metadata to/from any +/// instruction (including by deleting or cloning instructions). +AssignmentInstRange getAssignmentInsts(DIAssignID *ID); +/// Return a range of instructions (typically just one) that perform the +/// assignment that \p DAI encodes. +/// Iterators invalidated by adding or removing DIAssignID metadata to/from any +/// instruction (including by deleting or cloning instructions). +inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) { + return getAssignmentInsts(cast(DAI->getAssignID())); +} + +// +// Utilities for enumerating llvm.dbg.assign intrinsic from an assignment ID. +// +/// High level: this is an iterator for llvm.dbg.assign intrinsics. +/// Implementation details: this is a wrapper around Value's User iterator that +/// dereferences to a DbgAssignIntrinsic ptr rather than a User ptr. +class DbgAssignIt + : public iterator_adaptor_base::iterator_category, + DbgAssignIntrinsic *, std::ptrdiff_t, + DbgAssignIntrinsic **, + DbgAssignIntrinsic *&> { +public: + DbgAssignIt(Value::user_iterator It) : iterator_adaptor_base(It) {} + DbgAssignIntrinsic *operator*() const { return cast(*I); } +}; +/// A range of llvm.dbg.assign intrinsics. +using AssignmentMarkerRange = iterator_range; +/// Return a range of dbg.assign intrinsics which use \ID as an operand. +/// Iterators invalidated by deleting an intrinsic contained in this range. +AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID); +/// Return a range of dbg.assign intrinsics for which \p Inst performs the +/// assignment they encode. +/// Iterators invalidated by deleting an intrinsic contained in this range. +inline AssignmentMarkerRange getAssignmentMarkers(const Instruction *Inst) { + if (auto *ID = Inst->getMetadata(LLVMContext::MD_DIAssignID)) + return getAssignmentMarkers(cast(ID)); + else + return make_range(Value::user_iterator(), Value::user_iterator()); +} + +/// Replace all uses (and attachments) of \p Old with \p New. +void RAUW(DIAssignID *Old, DIAssignID *New); + +/// Remove all Assignment Tracking related intrinsics and metadata from \p F. +void deleteAll(Function *F); + +} // end namespace at + /// Return true if assignment tracking is enabled. bool getEnableAssignmentTracking(); } // end namespace llvm diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index f85fcb93068fa..131a7414a1a7d 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -515,6 +515,10 @@ class Instruction : public User, void getAllMetadataImpl(SmallVectorImpl> &) const; + /// Update the LLVMContext ID-to-Instruction(s) mapping. If \p ID is nullptr + /// then clear the mapping for this instruction. + void updateDIAssignIDMapping(DIAssignID *ID); + public: //===--------------------------------------------------------------------===// // Predicates and helper methods. diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 25204847ca9ce..c1835b3e3023c 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -853,7 +853,18 @@ bool LLParser::parseStandaloneMetadata() { // See if this was forward referenced, if so, handle it. auto FI = ForwardRefMDNodes.find(MetadataID); if (FI != ForwardRefMDNodes.end()) { - FI->second.first->replaceAllUsesWith(Init); + auto *ToReplace = FI->second.first.get(); + // DIAssignID has its own special forward-reference "replacement" for + // attachments (the temporary attachments are never actually attached). + if (isa(Init)) { + for (auto *Inst : TempDIAssignIDAttachments[ToReplace]) { + assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID) && + "Inst unexpectedly already has DIAssignID attachment"); + Inst->setMetadata(LLVMContext::MD_DIAssignID, Init); + } + } + + ToReplace->replaceAllUsesWith(Init); ForwardRefMDNodes.erase(FI); assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work"); @@ -2082,7 +2093,11 @@ bool LLParser::parseInstructionMetadata(Instruction &Inst) { if (parseMetadataAttachment(MDK, N)) return true; - Inst.setMetadata(MDK, N); + if (MDK == LLVMContext::MD_DIAssignID) + TempDIAssignIDAttachments[N].push_back(&Inst); + else + Inst.setMetadata(MDK, N); + if (MDK == LLVMContext::MD_tbaa) InstsWithTBAATag.push_back(&Inst); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index d30fca63067c0..89b5ff218de15 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm-c/DebugInfo.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -37,6 +38,7 @@ #include using namespace llvm; +using namespace llvm::at; using namespace llvm::dwarf; static cl::opt @@ -1632,3 +1634,61 @@ LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) { return (LLVMMetadataKind)LLVMGenericDINodeMetadataKind; } } + +AssignmentInstRange at::getAssignmentInsts(DIAssignID *ID) { + assert(ID && "Expected non-null ID"); + LLVMContext &Ctx = ID->getContext(); + auto &Map = Ctx.pImpl->AssignmentIDToInstrs; + + auto MapIt = Map.find(ID); + if (MapIt == Map.end()) + return make_range(nullptr, nullptr); + + return make_range(MapIt->second.begin(), MapIt->second.end()); +} + +AssignmentMarkerRange at::getAssignmentMarkers(DIAssignID *ID) { + assert(ID && "Expected non-null ID"); + LLVMContext &Ctx = ID->getContext(); + + auto *IDAsValue = MetadataAsValue::getIfExists(Ctx, ID); + + // The ID is only used wrapped in MetadataAsValue(ID), so lets check that + // one of those already exists first. + if (!IDAsValue) + return make_range(Value::user_iterator(), Value::user_iterator()); + + return make_range(IDAsValue->user_begin(), IDAsValue->user_end()); +} + +void at::RAUW(DIAssignID *Old, DIAssignID *New) { + // Replace MetadataAsValue uses. + if (auto *OldIDAsValue = + MetadataAsValue::getIfExists(Old->getContext(), Old)) { + auto *NewIDAsValue = MetadataAsValue::get(Old->getContext(), New); + OldIDAsValue->replaceAllUsesWith(NewIDAsValue); + } + + // Replace attachments. + AssignmentInstRange InstRange = getAssignmentInsts(Old); + // Use intermediate storage for the instruction ptrs because the + // getAssignmentInsts range iterators will be invalidated by adding and + // removing DIAssignID attachments. + SmallVector InstVec(InstRange.begin(), InstRange.end()); + for (auto *I : InstVec) + I->setMetadata(LLVMContext::MD_DIAssignID, New); +} + +void at::deleteAll(Function *F) { + SmallVector ToDelete; + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + if (auto *DAI = dyn_cast(&I)) + ToDelete.push_back(DAI); + else + I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); + } + } + for (auto *DAI : ToDelete) + DAI->eraseFromParent(); +} diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 007e518a1a817..74fc3416b564f 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -55,6 +55,10 @@ Instruction::~Instruction() { // instructions in a BasicBlock are deleted). if (isUsedByMetadata()) ValueAsMetadata::handleRAUW(this, UndefValue::get(getType())); + + // Explicitly remove DIAssignID metadata to clear up ID -> Instruction(s) + // mapping in LLVMContext. + setMetadata(LLVMContext::MD_DIAssignID, nullptr); } diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 0b1e5194222fc..3f4f222a0720e 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1499,6 +1499,11 @@ class LLVMContextImpl { /// Collection of metadata used in this context. DenseMap ValueMetadata; + /// Map DIAssignID -> Instructions with that attachment. + /// Managed by Instruction via Instruction::updateDIAssignIDMapping. + /// Query using the at:: functions defined in DebugInfo.h. + DenseMap> AssignmentIDToInstrs; + /// Collection of per-GlobalObject sections used in this context. DenseMap GlobalObjectSections; diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 052f3b1b37ded..5336902031898 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1425,6 +1425,37 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef KnownIDs) { } } +void Instruction::updateDIAssignIDMapping(DIAssignID *ID) { + auto &IDToInstrs = getContext().pImpl->AssignmentIDToInstrs; + if (const DIAssignID *CurrentID = + cast_or_null(getMetadata(LLVMContext::MD_DIAssignID))) { + // Nothing to do if the ID isn't changing. + if (ID == CurrentID) + return; + + // Unmap this instruction from its current ID. + auto InstrsIt = IDToInstrs.find(CurrentID); + assert(InstrsIt != IDToInstrs.end() && + "Expect existing attachment to be mapped"); + + auto &InstVec = InstrsIt->second; + auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this); + assert(InstIt != InstVec.end() && + "Expect instruction to be mapped to attachment"); + // The vector contains a ptr to this. If this is the only element in the + // vector, remove the ID:vector entry, otherwise just remove the + // instruction from the vector. + if (InstVec.size() == 1) + IDToInstrs.erase(InstrsIt); + else + InstVec.erase(InstIt); + } + + // Map this instruction to the new ID. + if (ID) + IDToInstrs[ID].push_back(this); +} + void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (!Node && !hasMetadata()) return; @@ -1435,6 +1466,16 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { return; } + // Update DIAssignID to Instruction(s) mapping. + if (KindID == LLVMContext::MD_DIAssignID) { + // The DIAssignID tracking infrastructure doesn't support RAUWing temporary + // nodes with DIAssignIDs. The cast_or_null below would also catch this, but + // having a dedicated assert helps make this obvious. + assert((!Node || !Node->isTemporary()) && + "Temporary DIAssignIDs are invalid"); + updateDIAssignIDMapping(cast_or_null(Node)); + } + Value::setMetadata(KindID, Node); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 324fa66fb2ea9..5e41fb1261575 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -4548,6 +4549,10 @@ void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { CheckDI(isa(User), "!DIAssignID should only be used by llvm.dbg.assign intrinsics", MD, User); + // All of the dbg.assign intrinsics should be in the same function as I. + if (auto *DAI = dyn_cast(User)) + CheckDI(DAI->getFunction() == I.getFunction(), + "dbg.assign not in same function as inst", DAI, &I); } } } @@ -6008,6 +6013,10 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { CheckDI(isa(DAI->getRawAddressExpression()), "invalid llvm.dbg.assign intrinsic address expression", &DII, DAI->getRawAddressExpression()); + // All of the linked instructions should be in the same function as DII. + for (Instruction *I : at::getAssignmentInsts(DAI)) + CheckDI(DAI->getFunction() == I->getFunction(), + "inst not in same function as dbg.assign", I, DAI); } // Ignore broken !dbg attachments; they're checked elsewhere. diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll index 577289604d536..9fa17e7f06ee1 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll @@ -6,6 +6,13 @@ ;; ;; Checks for this one are inline. +define dso_local void @fun2() !dbg !15 { + ;; DIAssignID copied here from @fun() where it is used by intrinsics. + ; CHECK: dbg.assign not in same function as inst + %x = alloca i32, align 4, !DIAssignID !14 + ret void +} + define dso_local void @fun() !dbg !7 { entry: %a = alloca i32, align 4, !DIAssignID !14 @@ -50,3 +57,4 @@ declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !13 = !DILocation(line: 1, column: 1, scope: !7) !14 = distinct !DIAssignID() +!15 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 524752168b091..9888bb6dd8e50 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -368,4 +368,130 @@ TEST(DIBuilder, createDbgAddr) { EXPECT_EQ(MDExp->getNumElements(), 0u); } +TEST(AssignmentTrackingTest, Utils) { + // Test the assignment tracking utils defined in DebugInfo.h namespace at {}. + // This includes: + // getAssignmentInsts + // getAssignmentMarkers + // RAUW + // deleteAll + // + // The input IR includes two functions, fun1 and fun2. Both contain an alloca + // with a DIAssignID tag. fun1's alloca is linked to two llvm.dbg.assign + // intrinsics, one of which is for an inlined variable and appears before the + // alloca. + + LLVMContext C; + std::unique_ptr M = parseIR(C, R"( + define dso_local void @fun1() !dbg !7 { + entry: + call void @llvm.dbg.assign(metadata i32 undef, metadata !10, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !13 + %local = alloca i32, align 4, !DIAssignID !12 + call void @llvm.dbg.assign(metadata i32 undef, metadata !16, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !15 + ret void, !dbg !15 + } + + define dso_local void @fun2() !dbg !17 { + entry: + %local = alloca i32, align 4, !DIAssignID !20 + call void @llvm.dbg.assign(metadata i32 undef, metadata !18, metadata !DIExpression(), metadata !20, metadata i32 undef, metadata !DIExpression()), !dbg !19 + ret void, !dbg !19 + } + + declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!"clang version 14.0.0"} + !7 = distinct !DISubprogram(name: "fun1", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{null} + !10 = !DILocalVariable(name: "local3", scope: !14, file: !1, line: 2, type: !11) + !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !12 = distinct !DIAssignID() + !13 = !DILocation(line: 5, column: 1, scope: !14, inlinedAt: !15) + !14 = distinct !DISubprogram(name: "inline", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !15 = !DILocation(line: 3, column: 1, scope: !7) + !16 = !DILocalVariable(name: "local1", scope: !7, file: !1, line: 2, type: !11) + !17 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !18 = !DILocalVariable(name: "local2", scope: !17, file: !1, line: 2, type: !11) + !19 = !DILocation(line: 4, column: 1, scope: !17) + !20 = distinct !DIAssignID() + )"); + + // Check the test IR isn't malformed. + ASSERT_TRUE(M); + + Function &Fun1 = *M->getFunction("fun1"); + Instruction &Alloca = *Fun1.getEntryBlock().getFirstNonPHIOrDbg(); + + // 1. Check the Instruction <-> Intrinsic mappings work in fun1. + // + // Check there are two llvm.dbg.assign intrinsics linked to Alloca. + auto CheckFun1Mapping = [&Alloca]() { + auto Markers = at::getAssignmentMarkers(&Alloca); + EXPECT_TRUE(std::distance(Markers.begin(), Markers.end()) == 2); + // Check those two entries are distinct. + DbgAssignIntrinsic *First = *Markers.begin(); + DbgAssignIntrinsic *Second = *std::next(Markers.begin()); + EXPECT_NE(First, Second); + + // Check that we can get back to Alloca from each llvm.dbg.assign. + for (auto *DAI : Markers) { + auto Insts = at::getAssignmentInsts(DAI); + // Check there is exactly one instruction linked to each intrinsic. Use + // ASSERT_TRUE because we're going to dereference the begin iterator. + ASSERT_TRUE(std::distance(Insts.begin(), Insts.end()) == 1); + EXPECT_FALSE(Insts.empty()); + // Check the linked instruction is Alloca. + Instruction *LinkedInst = *Insts.begin(); + EXPECT_EQ(LinkedInst, &Alloca); + } + }; + CheckFun1Mapping(); + + // 2. Check DIAssignID RAUW replaces attachments and uses. + // + DIAssignID *Old = + cast_or_null(Alloca.getMetadata(LLVMContext::MD_DIAssignID)); + DIAssignID *New = DIAssignID::getDistinct(C); + ASSERT_TRUE(Old && New && New != Old); + at::RAUW(Old, New); + // Check fun1's alloca and intrinsics have been updated and the mapping still + // works. + EXPECT_EQ(New, cast_or_null( + Alloca.getMetadata(LLVMContext::MD_DIAssignID))); + CheckFun1Mapping(); + + // Check that fun2's alloca and intrinsic have not not been updated. + Instruction &Fun2Alloca = + *M->getFunction("fun2")->getEntryBlock().getFirstNonPHIOrDbg(); + DIAssignID *Fun2ID = cast_or_null( + Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID)); + EXPECT_NE(New, Fun2ID); + auto Fun2Markers = at::getAssignmentMarkers(&Fun2Alloca); + ASSERT_TRUE(std::distance(Fun2Markers.begin(), Fun2Markers.end()) == 1); + auto Fun2Insts = at::getAssignmentInsts(*Fun2Markers.begin()); + ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); + EXPECT_EQ(*Fun2Insts.begin(), &Fun2Alloca); + + // 3. Check that deleting works and applies only to the target function. + at::deleteAll(&Fun1); + // There should now only be the alloca and ret in fun1. + EXPECT_EQ(Fun1.begin()->size(), 2); + // fun2's alloca should have the same DIAssignID and remain linked to its + // llvm.dbg.assign. + EXPECT_EQ(Fun2ID, cast_or_null( + Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID))); + EXPECT_FALSE(at::getAssignmentMarkers(&Fun2Alloca).empty()); +} + } // end namespace From 4aabbc0c85b6c188d6cf9b45d548fbfb149dbc62 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Nov 2022 12:27:02 +0000 Subject: [PATCH 408/516] [X86] Flatten WriteShift/Rotate SchedRW defs Some "inner" defs were being overriding "outer" SchedRW defs, making it very tricky to track what schedule was being used. Noticed as I'm trying to remove a lot of unnecessary shift/rotate RMW overrides from the scheduler models --- llvm/lib/Target/X86/X86InstrShiftRotate.td | 72 ++++++++++++---------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 823ff78b99035..e57169db7b1d7 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -14,7 +14,7 @@ let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, cl}", @@ -30,6 +30,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (shl GR64:$src1, CL))]>; } // Uses = [CL], SchedRW +let SchedRW = [WriteShift] in { let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", @@ -61,7 +62,8 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; } // hasSideEffects = 0 -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern // using CL? @@ -81,7 +83,7 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t{%cl, $dst|$dst, cl}", [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -118,7 +120,7 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, cl}", @@ -132,8 +134,9 @@ def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (srl GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteShift] in { def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2), "shr{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; @@ -162,7 +165,8 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in { @@ -181,7 +185,7 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t{%cl, $dst|$dst, cl}", [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -218,7 +222,7 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, cl}", @@ -234,8 +238,9 @@ def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (sra GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteShift] in { def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "sar{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; @@ -265,7 +270,8 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in { @@ -284,7 +290,7 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -326,7 +332,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), @@ -337,9 +343,9 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{%cl, $dst|$dst, cl}", []>, OpSize32; def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "rcl{q}\t{%cl, $dst|$dst, cl}", []>; -} // Uses = [CL, EFLAGS] +} // Uses = [CL, EFLAGS], SchedRW -let Uses = [EFLAGS] in { +let Uses = [EFLAGS], SchedRW = [WriteRotate] in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", []>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), @@ -356,7 +362,7 @@ def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "rcl{q}\t$dst", []>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), @@ -367,9 +373,9 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, cl}", []>, OpSize32; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t{%cl, $dst|$dst, cl}", []>; -} // Uses = [CL, EFLAGS] +} // Uses = [CL, EFLAGS], SchedRW -let Uses = [EFLAGS] in { +let Uses = [EFLAGS], SchedRW = [WriteRotate] in { def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t$dst", []>; def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), @@ -386,12 +392,12 @@ def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t$dst", []>; def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW } // Constraints = "$src = $dst" -let SchedRW = [WriteRotateLd, WriteRMW], mayStore = 1 in { -let Uses = [EFLAGS] in { +let mayStore = 1 in { +let Uses = [EFLAGS], SchedRW = [WriteRotateLd, WriteRMW] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", []>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt), @@ -427,7 +433,7 @@ def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>, Requires<[In64BitMode]>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCLLd, WriteRMW] in { def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), @@ -449,11 +455,11 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, cl}", []>, Requires<[In64BitMode]>; -} // Uses = [CL, EFLAGS] -} // SchedRW +} // Uses = [CL, EFLAGS], SchedRW +} // mayStore } // hasSideEffects = 0 -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL], SchedRW = [WriteRotateCL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -468,8 +474,9 @@ def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (rotl GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteRotate] in { def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "rol{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; @@ -499,7 +506,8 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), @@ -515,7 +523,7 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t{%cl, $dst|$dst, cl}", [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteRotateLd, WriteRMW] in { def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1), @@ -552,7 +560,7 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteRotateCL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, cl}", @@ -568,6 +576,7 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (rotr GR64:$src1, CL))]>; } +let SchedRW = [WriteRotate] in { def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "ror{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; @@ -597,6 +606,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // SchedRW } // Constraints = "$src = $dst", SchedRW let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in { @@ -613,7 +623,7 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteRotateLd, WriteRMW] in { def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -688,7 +698,7 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>, TB; -} // SchedRW +} // Uses, SchedRW let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, @@ -763,7 +773,7 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL), addr:$dst)]>, TB; -} // SchedRW +} // Uses, SchedRW let SchedRW = [WriteSHDmri] in { def SHLD16mri8 : Ii8<0xA4, MRMDestMem, From 028df7fab11bd8c26d8f5689e049186eb8b39092 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Mon, 7 Nov 2022 12:33:23 +0000 Subject: [PATCH 409/516] Fix warning: comparison of integers of different signs Buildbot failure: https://lab.llvm.org/buildbot/#/builders/36/builds/26925 Review & commit: https://reviews.llvm.org/D132224 https://reviews.llvm.org/rG171f7024cc82e8702abebdedb699d37b50574be7 --- llvm/unittests/IR/DebugInfoTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 9888bb6dd8e50..7cdd3ae2bb849 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -486,7 +486,7 @@ TEST(AssignmentTrackingTest, Utils) { // 3. Check that deleting works and applies only to the target function. at::deleteAll(&Fun1); // There should now only be the alloca and ret in fun1. - EXPECT_EQ(Fun1.begin()->size(), 2); + EXPECT_EQ(Fun1.begin()->size(), 2u); // fun2's alloca should have the same DIAssignID and remain linked to its // llvm.dbg.assign. EXPECT_EQ(Fun2ID, cast_or_null( From 6e279f5bb663b8edca53c1195edd11e3502677e1 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 7 Nov 2022 15:45:55 +0300 Subject: [PATCH 410/516] [AMDGPU][MC][GFX10+] Enable literal operands with permlane16/permlanex16 Differential Revision: https://reviews.llvm.org/D137332 --- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 7 ++----- .../CodeGen/AMDGPU/llvm.amdgcn.permlane.ll | 6 ++---- llvm/test/MC/AMDGPU/gfx10_asm_vop3.s | 18 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 18 ++++++++++++++++++ .../test/MC/Disassembler/AMDGPU/gfx10_vop3.txt | 12 ++++++++++++ 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index bb2b918837c6e..fdbdfe5c47f9e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -660,12 +660,9 @@ let SubtargetPredicate = isGFX11Only in defm : IMAD32_Pats; def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { - let Src0RC64 = VRegSrc_32; - let Src1RC64 = SCSrc_b32; - let Src2RC64 = SCSrc_b32; let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, - IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1, - IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, + IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, + IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; let HasExtVOP3DPP = 0; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll index 862dfe7154fd3..6b233f9a59e5f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -27,9 +27,8 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src ; GCN-LABEL: {{^}}v_permlane16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out @@ -124,9 +123,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr ; GCN-LABEL: {{^}}v_permlanex16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s index d369973d56dd4..b05bab15e2008 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s @@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, 0.5, s3 v_permlane16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlane16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01] @@ -12830,6 +12833,12 @@ v_permlane16_b32 v5, v1, s2, 0.5 v_permlane16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03] +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00] @@ -12923,6 +12932,9 @@ v_permlanex16_b32 v5, v1, 0.5, s3 v_permlanex16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01] @@ -12956,6 +12968,12 @@ v_permlanex16_b32 v5, v1, s2, 0.5 v_permlanex16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03] +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 693e12fd01b6d..991ef34807e85 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -5287,6 +5287,15 @@ v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlanex16_b32 v5, v1, s2, s3 // GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] @@ -5323,6 +5332,15 @@ v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] // GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] // GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt index 66ce6b8b94fab..0785ba2ea2eb6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -16044,6 +16044,9 @@ # GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03 @@ -16071,6 +16074,9 @@ # GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00 @@ -16149,6 +16155,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03 @@ -16176,6 +16185,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00 From 8f68952183822b63b11f61e5a3c3ade8af33a63a Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Mon, 7 Nov 2022 15:52:55 +0300 Subject: [PATCH 411/516] [AMDGPU][MC][GFX11][NFC] Correct VINTERP src operands Differential Revision: https://reviews.llvm.org/D137238 --- llvm/lib/Target/AMDGPU/VINTERPInstructions.td | 8 ++++ llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s | 42 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td index c63fbbc241d90..71de20223e9f6 100644 --- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td @@ -63,6 +63,10 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> { let HasOpSel = 0; let HasModifiers = 1; + let Src0Mod = FPVRegInputMods; + let Src1Mod = FPVRegInputMods; + let Src2Mod = FPVRegInputMods; + let Outs64 = (outs VGPR_32:$vdst); let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Src1Mod:$src1_modifiers, VRegSrc_32:$src1, @@ -77,6 +81,10 @@ class VOP3_VINTERP_F16 ArgVT> : VOPProfile { let HasOpSel = 1; let HasModifiers = 1; + let Src0Mod = FPVRegInputMods; + let Src1Mod = FPVRegInputMods; + let Src2Mod = FPVRegInputMods; + let Outs64 = (outs VGPR_32:$vdst); let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Src1Mod:$src1_modifiers, VRegSrc_32:$src1, diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s new file mode 100644 index 0000000000000..415f7348c9ee6 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s @@ -0,0 +1,42 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck %s -check-prefix=GFX11-ERR --implicit-check-not=error: --strict-whitespace + +//===----------------------------------------------------------------------===// +// VINTERP src operands must be VGPRs. +// Check that other operand kinds are rejected by assembler. +//===----------------------------------------------------------------------===// + +v_interp_p10_f32 v0, s1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, s2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, v2, s3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, 1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, 2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, v2, 3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, s1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, s2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, v2, s3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, 1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, 2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, v2, 3 +// GFX11-ERR: error: invalid operand for instruction From 671709f0e7d49826fd0908be2c9aed07debf5bc9 Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Fri, 21 Oct 2022 15:34:57 +0100 Subject: [PATCH 412/516] [clang][ExtractAPI] Add targetFallback to relationships in symbol graph Adds a 'targetFallback' field to relationships in symbol graph that contains the plain name of the relationship target. This is useful for clients when the relationship target symbol is not available. Differential Revision: https://reviews.llvm.org/D136455 --- .../Serialization/SymbolGraphSerializer.cpp | 1 + .../ExtractAPI/anonymous_record_no_typedef.c | 12 ++++--- clang/test/ExtractAPI/enum.c | 33 ++++++++++++------- clang/test/ExtractAPI/objc_category.m | 12 ++++--- clang/test/ExtractAPI/objc_interface.m | 21 ++++++++---- clang/test/ExtractAPI/objc_property.m | 21 ++++++++---- clang/test/ExtractAPI/objc_protocol.m | 3 +- clang/test/ExtractAPI/struct.c | 12 ++++--- clang/test/ExtractAPI/underscored.c | 3 +- 9 files changed, 79 insertions(+), 39 deletions(-) diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 988ecd2defa9c..641f1ae812a58 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -559,6 +559,7 @@ void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, Object Relationship; Relationship["source"] = Source.USR; Relationship["target"] = Target.USR; + Relationship["targetFallback"] = Target.Name; Relationship["kind"] = getRelationshipString(Kind); Relationships.emplace_back(std::move(Relationship)); diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index e20abfdd86ab4..abb96db058dbf 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -56,22 +56,26 @@ struct Vehicle { { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Bicycle", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Car", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@type", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@information", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 07d848082981f..7b345464cb982 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -65,57 +65,68 @@ enum { { "kind": "memberOf", "source": "c:@E@Vehicle@Bicycle", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Car", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Train", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Ship", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Airplane", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Direction@North", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@East", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@South", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@West", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@Ea@Constant@Constant", - "target": "c:@Ea@Constant" + "target": "c:@Ea@Constant", + "targetFallback": "enum (unnamed)" }, { "kind": "memberOf", "source": "c:@Ea@OtherConstant@OtherConstant", - "target": "c:@Ea@OtherConstant" + "target": "c:@Ea@OtherConstant", + "targetFallback": "enum (unnamed)" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_category.m b/clang/test/ExtractAPI/objc_category.m index 56bac43a11cdc..185016dfe848c 100644 --- a/clang/test/ExtractAPI/objc_category.m +++ b/clang/test/ExtractAPI/objc_category.m @@ -54,22 +54,26 @@ + (void)ClassMethod; { "kind": "memberOf", "source": "c:objc(cs)Interface(im)InstanceMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cm)ClassMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)Property", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_interface.m b/clang/test/ExtractAPI/objc_interface.m index 740a215400d9c..159e97a193a13 100644 --- a/clang/test/ExtractAPI/objc_interface.m +++ b/clang/test/ExtractAPI/objc_interface.m @@ -57,37 +57,44 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Super(cm)getWithProperty:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(im)setProperty:andOtherThing:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(py)Property", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "conformsTo", "source": "c:objc(cs)Super", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(cs)Derived@Ivar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "memberOf", "source": "c:objc(cs)Derived(im)getIvar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "inheritsFrom", "source": "c:objc(cs)Derived", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_property.m b/clang/test/ExtractAPI/objc_property.m index 1b50950d44243..f09a5ad724238 100644 --- a/clang/test/ExtractAPI/objc_property.m +++ b/clang/test/ExtractAPI/objc_property.m @@ -55,37 +55,44 @@ @interface Interface (Category) { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myInterfaceTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myInterfaceInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myCategoryTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myCategoryInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(cpy)myProtocolTypeProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(py)myProtocolInstanceProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_protocol.m b/clang/test/ExtractAPI/objc_protocol.m index 036850924587c..d9a65f419df89 100644 --- a/clang/test/ExtractAPI/objc_protocol.m +++ b/clang/test/ExtractAPI/objc_protocol.m @@ -49,7 +49,8 @@ @protocol AnotherProtocol { "kind": "conformsTo", "source": "c:objc(pl)AnotherProtocol", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 516055768749c..7e93f0d7e7bfa 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -52,22 +52,26 @@ struct Color { { "kind": "memberOf", "source": "c:@S@Color@FI@Red", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Green", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Blue", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Alpha", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 47f1893cdb029..6eeaf1ce412c3 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -65,7 +65,8 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "memberOf", "source": "c:@S@ExposedRecord@FI@a", - "target": "c:@S@ExposedRecord" + "target": "c:@S@ExposedRecord", + "targetFallback": "ExposedRecord" } ], "symbols": [ From f63db9159bbbb0db98e13cb4440fdaa5c40e219b Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Tue, 25 Oct 2022 11:16:20 +0100 Subject: [PATCH 413/516] Only add targetFallback if target is not in defined in current product --- clang/include/clang/ExtractAPI/API.h | 6 ++++ clang/lib/ExtractAPI/API.cpp | 33 +++++++++++++++++++ .../Serialization/SymbolGraphSerializer.cpp | 5 ++- .../ExtractAPI/anonymous_record_no_typedef.c | 12 +++---- clang/test/ExtractAPI/enum.c | 33 +++++++------------ clang/test/ExtractAPI/objc_category.m | 9 ++--- clang/test/ExtractAPI/objc_interface.m | 18 ++++------ clang/test/ExtractAPI/objc_property.m | 21 ++++-------- clang/test/ExtractAPI/objc_protocol.m | 3 +- clang/test/ExtractAPI/struct.c | 12 +++---- clang/test/ExtractAPI/underscored.c | 3 +- 11 files changed, 80 insertions(+), 75 deletions(-) diff --git a/clang/include/clang/ExtractAPI/API.h b/clang/include/clang/ExtractAPI/API.h index b77d76d500df6..ffb700eb923f8 100644 --- a/clang/include/clang/ExtractAPI/API.h +++ b/clang/include/clang/ExtractAPI/API.h @@ -675,6 +675,12 @@ class APISet { const RecordMap &getMacros() const { return Macros; } const RecordMap &getTypedefs() const { return Typedefs; } + /// Get the APIRecord associated with the USR if it's defined in the + /// current product. + /// + /// \returns a APIRecord pointer to the stored symbol record if it exists. + APIRecord *getSymbolForUSR(StringRef USR) const; + /// Generate and store the USR of declaration \p D. /// /// Note: The USR string is stored in and owned by Allocator. diff --git a/clang/lib/ExtractAPI/API.cpp b/clang/lib/ExtractAPI/API.cpp index 8ab03a833e3c2..48322023d5041 100644 --- a/clang/lib/ExtractAPI/API.cpp +++ b/clang/lib/ExtractAPI/API.cpp @@ -197,6 +197,39 @@ TypedefRecord *APISet::addTypedef(StringRef Name, StringRef USR, Comment, Declaration, SubHeading, UnderlyingType); } +template +static APIRecord *getSymbolInRecordMapForUSR(StringRef USR, + const RecordMap &Records) { + auto It = Records.find(USR); + return (It != Records.end() ? It->second.get() : nullptr); +} + +APIRecord *APISet::getSymbolForUSR(StringRef USR) const { + if (USR.empty()) + return nullptr; + if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCProtocols)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCInterfaces)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCCategories)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCCategories)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, Structs)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, Enums)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, Typedefs)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, GlobalFunctions)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, GlobalVariables)) + return Record; + if (auto *Record = getSymbolInRecordMapForUSR(USR, Macros)) + return Record; + return nullptr; +} + StringRef APISet::recordUSR(const Decl *D) { SmallString<128> USR; index::generateUSRForDecl(D, USR); diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 641f1ae812a58..807c618e3198f 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -559,7 +559,10 @@ void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, Object Relationship; Relationship["source"] = Source.USR; Relationship["target"] = Target.USR; - Relationship["targetFallback"] = Target.Name; + // Emit a fallback if the target is not a symbol that will be part of this + // symbol graph. + if (API.getSymbolForUSR(Target.USR) == nullptr) + Relationship["targetFallback"] = Target.Name; Relationship["kind"] = getRelationshipString(Kind); Relationships.emplace_back(std::move(Relationship)); diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index abb96db058dbf..e20abfdd86ab4 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -56,26 +56,22 @@ struct Vehicle { { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Bicycle", - "target": "c:@S@Vehicle@E@input.h@64", - "targetFallback": "Vehicle::enum (unnamed)" + "target": "c:@S@Vehicle@E@input.h@64" }, { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Car", - "target": "c:@S@Vehicle@E@input.h@64", - "targetFallback": "Vehicle::enum (unnamed)" + "target": "c:@S@Vehicle@E@input.h@64" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@type", - "target": "c:@S@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@S@Vehicle" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@information", - "target": "c:@S@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@S@Vehicle" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 7b345464cb982..07d848082981f 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -65,68 +65,57 @@ enum { { "kind": "memberOf", "source": "c:@E@Vehicle@Bicycle", - "target": "c:@E@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@E@Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Car", - "target": "c:@E@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@E@Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Train", - "target": "c:@E@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@E@Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Ship", - "target": "c:@E@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@E@Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Airplane", - "target": "c:@E@Vehicle", - "targetFallback": "Vehicle" + "target": "c:@E@Vehicle" }, { "kind": "memberOf", "source": "c:@E@Direction@North", - "target": "c:@E@Direction", - "targetFallback": "Direction" + "target": "c:@E@Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@East", - "target": "c:@E@Direction", - "targetFallback": "Direction" + "target": "c:@E@Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@South", - "target": "c:@E@Direction", - "targetFallback": "Direction" + "target": "c:@E@Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@West", - "target": "c:@E@Direction", - "targetFallback": "Direction" + "target": "c:@E@Direction" }, { "kind": "memberOf", "source": "c:@Ea@Constant@Constant", - "target": "c:@Ea@Constant", - "targetFallback": "enum (unnamed)" + "target": "c:@Ea@Constant" }, { "kind": "memberOf", "source": "c:@Ea@OtherConstant@OtherConstant", - "target": "c:@Ea@OtherConstant", - "targetFallback": "enum (unnamed)" + "target": "c:@Ea@OtherConstant" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_category.m b/clang/test/ExtractAPI/objc_category.m index 185016dfe848c..b0bdaaddbdade 100644 --- a/clang/test/ExtractAPI/objc_category.m +++ b/clang/test/ExtractAPI/objc_category.m @@ -54,20 +54,17 @@ + (void)ClassMethod; { "kind": "memberOf", "source": "c:objc(cs)Interface(im)InstanceMethod", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cm)ClassMethod", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)Property", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "conformsTo", diff --git a/clang/test/ExtractAPI/objc_interface.m b/clang/test/ExtractAPI/objc_interface.m index 159e97a193a13..908ee37f85bb2 100644 --- a/clang/test/ExtractAPI/objc_interface.m +++ b/clang/test/ExtractAPI/objc_interface.m @@ -57,20 +57,17 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Super(cm)getWithProperty:", - "target": "c:objc(cs)Super", - "targetFallback": "Super" + "target": "c:objc(cs)Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(im)setProperty:andOtherThing:", - "target": "c:objc(cs)Super", - "targetFallback": "Super" + "target": "c:objc(cs)Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(py)Property", - "target": "c:objc(cs)Super", - "targetFallback": "Super" + "target": "c:objc(cs)Super" }, { "kind": "conformsTo", @@ -81,20 +78,17 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Derived@Ivar", - "target": "c:objc(cs)Derived", - "targetFallback": "Derived" + "target": "c:objc(cs)Derived" }, { "kind": "memberOf", "source": "c:objc(cs)Derived(im)getIvar", - "target": "c:objc(cs)Derived", - "targetFallback": "Derived" + "target": "c:objc(cs)Derived" }, { "kind": "inheritsFrom", "source": "c:objc(cs)Derived", - "target": "c:objc(cs)Super", - "targetFallback": "Super" + "target": "c:objc(cs)Super" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_property.m b/clang/test/ExtractAPI/objc_property.m index f09a5ad724238..1b50950d44243 100644 --- a/clang/test/ExtractAPI/objc_property.m +++ b/clang/test/ExtractAPI/objc_property.m @@ -55,44 +55,37 @@ @interface Interface (Category) { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myInterfaceTypeProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myInterfaceInstanceProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myCategoryTypeProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myCategoryInstanceProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" + "target": "c:objc(cs)Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" + "target": "c:objc(pl)Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(cpy)myProtocolTypeProp", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" + "target": "c:objc(pl)Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(py)myProtocolInstanceProp", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" + "target": "c:objc(pl)Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_protocol.m b/clang/test/ExtractAPI/objc_protocol.m index d9a65f419df89..036850924587c 100644 --- a/clang/test/ExtractAPI/objc_protocol.m +++ b/clang/test/ExtractAPI/objc_protocol.m @@ -49,8 +49,7 @@ @protocol AnotherProtocol { "kind": "conformsTo", "source": "c:objc(pl)AnotherProtocol", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" + "target": "c:objc(pl)Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 7e93f0d7e7bfa..516055768749c 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -52,26 +52,22 @@ struct Color { { "kind": "memberOf", "source": "c:@S@Color@FI@Red", - "target": "c:@S@Color", - "targetFallback": "Color" + "target": "c:@S@Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Green", - "target": "c:@S@Color", - "targetFallback": "Color" + "target": "c:@S@Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Blue", - "target": "c:@S@Color", - "targetFallback": "Color" + "target": "c:@S@Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Alpha", - "target": "c:@S@Color", - "targetFallback": "Color" + "target": "c:@S@Color" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 6eeaf1ce412c3..47f1893cdb029 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -65,8 +65,7 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "memberOf", "source": "c:@S@ExposedRecord@FI@a", - "target": "c:@S@ExposedRecord", - "targetFallback": "ExposedRecord" + "target": "c:@S@ExposedRecord" } ], "symbols": [ From 39dbfa72aaebe64e913d65f1eeab48c5f33b8010 Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Mon, 7 Nov 2022 13:33:59 +0000 Subject: [PATCH 414/516] Revert "Only add targetFallback if target is not in defined in current product" This was an accidental addition of a non-reviewed change. This reverts commit f63db9159bbbb0db98e13cb4440fdaa5c40e219b. --- clang/include/clang/ExtractAPI/API.h | 6 ---- clang/lib/ExtractAPI/API.cpp | 33 ------------------- .../Serialization/SymbolGraphSerializer.cpp | 5 +-- .../ExtractAPI/anonymous_record_no_typedef.c | 12 ++++--- clang/test/ExtractAPI/enum.c | 33 ++++++++++++------- clang/test/ExtractAPI/objc_category.m | 9 +++-- clang/test/ExtractAPI/objc_interface.m | 18 ++++++---- clang/test/ExtractAPI/objc_property.m | 21 ++++++++---- clang/test/ExtractAPI/objc_protocol.m | 3 +- clang/test/ExtractAPI/struct.c | 12 ++++--- clang/test/ExtractAPI/underscored.c | 3 +- 11 files changed, 75 insertions(+), 80 deletions(-) diff --git a/clang/include/clang/ExtractAPI/API.h b/clang/include/clang/ExtractAPI/API.h index ffb700eb923f8..b77d76d500df6 100644 --- a/clang/include/clang/ExtractAPI/API.h +++ b/clang/include/clang/ExtractAPI/API.h @@ -675,12 +675,6 @@ class APISet { const RecordMap &getMacros() const { return Macros; } const RecordMap &getTypedefs() const { return Typedefs; } - /// Get the APIRecord associated with the USR if it's defined in the - /// current product. - /// - /// \returns a APIRecord pointer to the stored symbol record if it exists. - APIRecord *getSymbolForUSR(StringRef USR) const; - /// Generate and store the USR of declaration \p D. /// /// Note: The USR string is stored in and owned by Allocator. diff --git a/clang/lib/ExtractAPI/API.cpp b/clang/lib/ExtractAPI/API.cpp index 48322023d5041..8ab03a833e3c2 100644 --- a/clang/lib/ExtractAPI/API.cpp +++ b/clang/lib/ExtractAPI/API.cpp @@ -197,39 +197,6 @@ TypedefRecord *APISet::addTypedef(StringRef Name, StringRef USR, Comment, Declaration, SubHeading, UnderlyingType); } -template -static APIRecord *getSymbolInRecordMapForUSR(StringRef USR, - const RecordMap &Records) { - auto It = Records.find(USR); - return (It != Records.end() ? It->second.get() : nullptr); -} - -APIRecord *APISet::getSymbolForUSR(StringRef USR) const { - if (USR.empty()) - return nullptr; - if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCProtocols)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCInterfaces)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCCategories)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, ObjCCategories)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, Structs)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, Enums)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, Typedefs)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, GlobalFunctions)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, GlobalVariables)) - return Record; - if (auto *Record = getSymbolInRecordMapForUSR(USR, Macros)) - return Record; - return nullptr; -} - StringRef APISet::recordUSR(const Decl *D) { SmallString<128> USR; index::generateUSRForDecl(D, USR); diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 807c618e3198f..641f1ae812a58 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -559,10 +559,7 @@ void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, Object Relationship; Relationship["source"] = Source.USR; Relationship["target"] = Target.USR; - // Emit a fallback if the target is not a symbol that will be part of this - // symbol graph. - if (API.getSymbolForUSR(Target.USR) == nullptr) - Relationship["targetFallback"] = Target.Name; + Relationship["targetFallback"] = Target.Name; Relationship["kind"] = getRelationshipString(Kind); Relationships.emplace_back(std::move(Relationship)); diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index e20abfdd86ab4..abb96db058dbf 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -56,22 +56,26 @@ struct Vehicle { { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Bicycle", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Car", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@type", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@information", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 07d848082981f..7b345464cb982 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -65,57 +65,68 @@ enum { { "kind": "memberOf", "source": "c:@E@Vehicle@Bicycle", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Car", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Train", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Ship", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Airplane", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Direction@North", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@East", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@South", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@West", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@Ea@Constant@Constant", - "target": "c:@Ea@Constant" + "target": "c:@Ea@Constant", + "targetFallback": "enum (unnamed)" }, { "kind": "memberOf", "source": "c:@Ea@OtherConstant@OtherConstant", - "target": "c:@Ea@OtherConstant" + "target": "c:@Ea@OtherConstant", + "targetFallback": "enum (unnamed)" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_category.m b/clang/test/ExtractAPI/objc_category.m index b0bdaaddbdade..185016dfe848c 100644 --- a/clang/test/ExtractAPI/objc_category.m +++ b/clang/test/ExtractAPI/objc_category.m @@ -54,17 +54,20 @@ + (void)ClassMethod; { "kind": "memberOf", "source": "c:objc(cs)Interface(im)InstanceMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cm)ClassMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)Property", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", diff --git a/clang/test/ExtractAPI/objc_interface.m b/clang/test/ExtractAPI/objc_interface.m index 908ee37f85bb2..159e97a193a13 100644 --- a/clang/test/ExtractAPI/objc_interface.m +++ b/clang/test/ExtractAPI/objc_interface.m @@ -57,17 +57,20 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Super(cm)getWithProperty:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(im)setProperty:andOtherThing:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(py)Property", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "conformsTo", @@ -78,17 +81,20 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Derived@Ivar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "memberOf", "source": "c:objc(cs)Derived(im)getIvar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "inheritsFrom", "source": "c:objc(cs)Derived", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_property.m b/clang/test/ExtractAPI/objc_property.m index 1b50950d44243..f09a5ad724238 100644 --- a/clang/test/ExtractAPI/objc_property.m +++ b/clang/test/ExtractAPI/objc_property.m @@ -55,37 +55,44 @@ @interface Interface (Category) { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myInterfaceTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myInterfaceInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myCategoryTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myCategoryInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(cpy)myProtocolTypeProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(py)myProtocolInstanceProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_protocol.m b/clang/test/ExtractAPI/objc_protocol.m index 036850924587c..d9a65f419df89 100644 --- a/clang/test/ExtractAPI/objc_protocol.m +++ b/clang/test/ExtractAPI/objc_protocol.m @@ -49,7 +49,8 @@ @protocol AnotherProtocol { "kind": "conformsTo", "source": "c:objc(pl)AnotherProtocol", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 516055768749c..7e93f0d7e7bfa 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -52,22 +52,26 @@ struct Color { { "kind": "memberOf", "source": "c:@S@Color@FI@Red", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Green", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Blue", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Alpha", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 47f1893cdb029..6eeaf1ce412c3 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -65,7 +65,8 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "memberOf", "source": "c:@S@ExposedRecord@FI@a", - "target": "c:@S@ExposedRecord" + "target": "c:@S@ExposedRecord", + "targetFallback": "ExposedRecord" } ], "symbols": [ From bada35390ad77ea023df7fa027d6de717b79bce2 Mon Sep 17 00:00:00 2001 From: Oleg Shyshkov Date: Mon, 7 Nov 2022 14:40:09 +0100 Subject: [PATCH 415/516] [mlir][NFC] Remove unnecessary attr name getters from StructuredOpsUtils.h. Those methods were added long time ago. Now we get the same methods generated by tablegen, so there is no need for duplicates. Differential Revision: https://reviews.llvm.org/D137544 --- .../mlir/Dialect/Utils/StructuredOpsUtils.h | 31 ------------------- .../mlir/Dialect/Vector/IR/VectorOps.td | 4 +-- .../Dialect/Linalg/Transforms/Interchange.cpp | 7 ++--- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 31 +++++++++---------- 4 files changed, 19 insertions(+), 54 deletions(-) diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 5086682ac60ee..6fcfcb1dde2a0 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -48,37 +48,6 @@ bool isColumnMajorMatmul(ArrayAttr indexingMaps); /// the reduction. bool isRowMajorBatchMatmul(ArrayAttr indexingMaps); -/// Attribute name for the AffineArrayAttr which encodes the relationship -/// between a structured op iterators' and its operands. -constexpr StringRef getIndexingMapsAttrName() { return "indexing_maps"; } - -/// Attribute name for the StrArrayAttr which encodes the type of a structured -/// op's iterators. -constexpr StringRef getIteratorTypesAttrName() { return "iterator_types"; } - -/// Attribute name for the StrArrayAttr which encodes the distribution type for -/// `linalg.tiled_loop`. -constexpr StringRef getDistributionTypesAttrName() { - return "distribution_types"; -} - -/// Attribute name for the StringAttr which encodes an optional documentation -/// string of the structured op. -constexpr StringRef getDocAttrName() { return "doc"; } - -/// Attribute name for the StrArrayAttr which encodes the external library -/// function that implements the structured op. -constexpr StringRef getLibraryCallAttrName() { return "library_call"; } - -/// Attribute name for the StrArrayAttr which encodes the value of strides. -constexpr StringRef getStridesAttrName() { return "strides"; } - -/// Attribute name for the StrArrayAttr which encodes the value of dilations. -constexpr StringRef getDilationsAttrName() { return "dilations"; } - -/// Attribute name for the StrArrayAttr which encodes the value of paddings. -constexpr StringRef getPaddingAttrName() { return "padding"; } - /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index b47c5fa32904e..e952284046b54 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -244,7 +244,7 @@ def Vector_ContractionOp : return getOperand(4).getType().cast(); } Type getResultType() { return getResult().getType(); } - ArrayRef getTraitAttrNames(); + SmallVector getTraitAttrNames(); static unsigned getAccOperandIndex() { return 2; } llvm::SmallVector<::mlir::AffineMap, 4> getIndexingMapsArray() { @@ -265,8 +265,6 @@ def Vector_ContractionOp : std::vector> getContractingDimMap(); std::vector> getBatchDimMap(); - static constexpr StringRef getKindAttrStrName() { return "kind"; } - static CombiningKind getDefaultKind() { return CombiningKind::ADD; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp index a74538767d76a..c6a9989d971fc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp @@ -73,8 +73,8 @@ mlir::linalg::interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, m = m.compose(permutationMap); newIndexingMaps.push_back(m); } - genericOp->setAttr(getIndexingMapsAttrName(), - rewriter.getAffineMapArrayAttr(newIndexingMaps)); + genericOp.setIndexingMapsAttr( + rewriter.getAffineMapArrayAttr(newIndexingMaps)); // 3. Compute the interchanged iterator types. ArrayRef itTypes = genericOp.getIteratorTypes().getValue(); @@ -83,8 +83,7 @@ mlir::linalg::interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, SmallVector permutation(interchangeVector.begin(), interchangeVector.end()); applyPermutationToVector(itTypesVector, permutation); - genericOp->setAttr(getIteratorTypesAttrName(), - ArrayAttr::get(context, itTypesVector)); + genericOp.setIteratorTypesAttr(rewriter.getArrayAttr(itTypesVector)); // 4. Transform the index operations by applying the permutation map. if (genericOp.hasIndexSemantics()) { diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 5dd98a1bada41..bd96ee7de24f7 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -508,11 +508,11 @@ void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, ArrayRef iteratorTypes) { result.addOperands({lhs, rhs, acc}); result.addTypes(acc.getType()); - result.addAttribute(::mlir::getIndexingMapsAttrName(), + result.addAttribute(getIndexingMapsAttrName(result.name), builder.getAffineMapArrayAttr( AffineMap::inferFromExprList(indexingExprs))); result.addAttribute( - ::mlir::getIteratorTypesAttrName(), + getIteratorTypesAttrName(result.name), builder.getArrayAttr(llvm::to_vector(llvm::map_range( iteratorTypes, [&](IteratorType t) -> mlir::Attribute { return IteratorTypeAttr::get(builder.getContext(), t); @@ -533,9 +533,9 @@ void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, ArrayAttr iteratorTypes, CombiningKind kind) { result.addOperands({lhs, rhs, acc}); result.addTypes(acc.getType()); - result.addAttribute(::mlir::getIndexingMapsAttrName(), indexingMaps); - result.addAttribute(::mlir::getIteratorTypesAttrName(), iteratorTypes); - result.addAttribute(ContractionOp::getKindAttrStrName(), + result.addAttribute(getIndexingMapsAttrName(result.name), indexingMaps); + result.addAttribute(getIteratorTypesAttrName(result.name), iteratorTypes); + result.addAttribute(getKindAttrName(result.name), CombiningKindAttr::get(builder.getContext(), kind)); } @@ -570,7 +570,8 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { // represented as an array of strings. // TODO: Remove this conversion once tests are fixed. ArrayAttr iteratorTypes = - result.attributes.get("iterator_types").cast(); + result.attributes.get(getIteratorTypesAttrName(result.name)) + .cast(); SmallVector iteratorTypeAttrs; @@ -579,15 +580,15 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { if (!maybeIteratorType.has_value()) return parser.emitError(loc) << "unexpected iterator_type (" << s << ")"; - iteratorTypeAttrs.push_back(IteratorTypeAttr::get( - parser.getContext(), maybeIteratorType.value())); + iteratorTypeAttrs.push_back( + IteratorTypeAttr::get(parser.getContext(), maybeIteratorType.value())); } - result.attributes.set("iterator_types", + result.attributes.set(getIteratorTypesAttrName(result.name), parser.getBuilder().getArrayAttr(iteratorTypeAttrs)); - if (!result.attributes.get(ContractionOp::getKindAttrStrName())) { + if (!result.attributes.get(getKindAttrName(result.name))) { result.addAttribute( - ContractionOp::getKindAttrStrName(), + getKindAttrName(result.name), CombiningKindAttr::get(result.getContext(), ContractionOp::getDefaultKind())); } @@ -822,11 +823,9 @@ LogicalResult ContractionOp::verify() { return success(); } -ArrayRef ContractionOp::getTraitAttrNames() { - static constexpr StringRef names[3] = {::mlir::getIndexingMapsAttrName(), - ::mlir::getIteratorTypesAttrName(), - ContractionOp::getKindAttrStrName()}; - return llvm::makeArrayRef(names); +SmallVector ContractionOp::getTraitAttrNames() { + return SmallVector{getIndexingMapsAttrName(), + getIteratorTypesAttrName(), getKindAttrName()}; } static int64_t getResultIndex(AffineMap map, AffineExpr targetExpr) { From 9a45e4beede24fdcd6b7d95416d0447ba154651c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 13 Oct 2022 16:45:37 +0200 Subject: [PATCH 416/516] [MemCpyOpt] Move lifetime marker before call to enable call slot optimization Currently call slot optimization may be prevented because the lifetime markers for the destination only start after the call. In this case, rather than aborting the transform, we should move the lifetime.start before the call to enable the transform. Differential Revision: https://reviews.llvm.org/D135886 --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 28 +++++++++++++++---- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 3 +- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 387b71da43737..43259cb42da18 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -331,16 +331,27 @@ void MemCpyOptPass::eraseInstruction(Instruction *I) { } // Check for mod or ref of Loc between Start and End, excluding both boundaries. -// Start and End must be in the same block +// Start and End must be in the same block. +// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start +// intrinsic and store it inside SkippedLifetimeStart. static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { + const MemoryUseOrDef *End, + Instruction **SkippedLifetimeStart = nullptr) { assert(Start->getBlock() == End->getBlock() && "Only local supported"); for (const MemoryAccess &MA : make_range(++Start->getIterator(), End->getIterator())) { - if (isModOrRefSet(AA.getModRefInfo(cast(MA).getMemoryInst(), - Loc))) + Instruction *I = cast(MA).getMemoryInst(); + if (isModOrRefSet(AA.getModRefInfo(I, Loc))) { + auto *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::lifetime_start && + SkippedLifetimeStart && !*SkippedLifetimeStart) { + *SkippedLifetimeStart = I; + continue; + } + return true; + } } return false; } @@ -913,8 +924,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that nothing touches the dest of the copy between // the call and the store/memcpy. + Instruction *SkippedLifetimeStart = nullptr; if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(cpyStore))) { + MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) { LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); return false; } @@ -1094,6 +1106,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, cast(cpyDest)->setAlignment(srcAlign); } + if (SkippedLifetimeStart) { + SkippedLifetimeStart->moveBefore(C); + MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart), + MSSA->getMemoryAccess(C)); + } + // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 77b94a7135d01..58e4ab8950553 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -55,9 +55,8 @@ define i32 @call_slot_move_lifetime_start() { ; CHECK-LABEL: @call_slot_move_lifetime_start( ; CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @call(ptr [[TMP]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DST]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[TMP]], i64 4, i1 false) +; CHECK-NEXT: call void @call(ptr [[DST]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DST]]) ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[DST]], align 4 ; CHECK-NEXT: ret i32 [[V]] From 12a6572d41f195a5765b49322b46b246c30759fc Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 3 Nov 2022 11:25:41 +0000 Subject: [PATCH 417/516] [AArch64] Add SME2.1 target feature for Armv9-A 2022 Architecture Extension First patch in a series adding MC layer support for SME2.1. This patch adds the following feature: sme2p1 The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D137410 --- llvm/include/llvm/Support/AArch64TargetParser.def | 1 + llvm/include/llvm/Support/AArch64TargetParser.h | 1 + llvm/lib/Target/AArch64/AArch64.td | 3 +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 9 +++++++-- llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s | 7 +++++++ llvm/test/MC/AArch64/SME2p1/directive-arch.s | 8 ++++++++ .../AArch64/SME2p1/directive-arch_extension-negative.s | 7 +++++++ llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s | 5 +++++ llvm/test/MC/AArch64/SVE2p1/bfmlslb.s | 2 +- llvm/test/MC/AArch64/SVE2p1/bfmlslt.s | 2 +- llvm/test/MC/AArch64/SVE2p1/cntp.s | 2 +- llvm/test/MC/AArch64/SVE2p1/fclamp.s | 2 +- llvm/test/MC/AArch64/SVE2p1/fdot.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ld1b.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ld1d.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ld1h.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ld1q.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ld1w.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ldnt1b.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ldnt1d.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ldnt1h.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ldnt1w.s | 2 +- llvm/test/MC/AArch64/SVE2p1/pext.s | 2 +- llvm/test/MC/AArch64/SVE2p1/ptrue.s | 2 +- llvm/test/MC/AArch64/SVE2p1/sdot.s | 2 +- llvm/test/MC/AArch64/SVE2p1/sqcvtn.s | 2 +- llvm/test/MC/AArch64/SVE2p1/sqcvtun.s | 2 +- llvm/test/MC/AArch64/SVE2p1/sqrshrn.s | 2 +- llvm/test/MC/AArch64/SVE2p1/sqrshrun.s | 2 +- llvm/test/MC/AArch64/SVE2p1/st1b.s | 2 +- llvm/test/MC/AArch64/SVE2p1/st1d.s | 2 +- llvm/test/MC/AArch64/SVE2p1/st1h.s | 2 +- llvm/test/MC/AArch64/SVE2p1/st1q.s | 2 +- llvm/test/MC/AArch64/SVE2p1/st1w.s | 2 +- llvm/test/MC/AArch64/SVE2p1/stnt1b.s | 2 +- llvm/test/MC/AArch64/SVE2p1/stnt1d.s | 2 +- llvm/test/MC/AArch64/SVE2p1/stnt1h.s | 2 +- llvm/test/MC/AArch64/SVE2p1/stnt1w.s | 2 +- llvm/test/MC/AArch64/SVE2p1/udot.s | 2 +- llvm/test/MC/AArch64/SVE2p1/uqcvtn.s | 2 +- llvm/test/MC/AArch64/SVE2p1/uqrshrn.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilege.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilegt.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilehi.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilehs.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilele.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilelo.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilels.s | 2 +- llvm/test/MC/AArch64/SVE2p1/whilelt.s | 2 +- llvm/unittests/Support/TargetParserTest.cpp | 4 +++- 51 files changed, 84 insertions(+), 44 deletions(-) create mode 100644 llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s create mode 100644 llvm/test/MC/AArch64/SME2p1/directive-arch.s create mode 100644 llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s create mode 100644 llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 2f83d0656c4dc..d1366f81cb5b3 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -148,6 +148,7 @@ AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64") AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64") AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2") +AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1") AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc") AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops") AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon") diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index 35b28413a88f7..e648ca8aceae0 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -74,6 +74,7 @@ enum ArchExtKind : uint64_t { AEK_PERFMON = 1ULL << 42, // FEAT_PMUv3 AEK_SME2 = 1ULL << 43, // FEAT_SME2 AEK_SVE2p1 = 1ULL << 44, // FEAT_SVE2p1 + AEK_SME2p1 = 1ULL << 45, // FEAT_SME2p1 }; enum class ArchKind { diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 48c4b78b65975..981e213186bf8 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -479,6 +479,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; +def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true", + "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>; + def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", "Apple A7 (the CPU formerly known as Cyclone)">; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 137e8ac917f04..0439de8c1c190 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -146,6 +146,8 @@ def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; def HasSME2 : Predicate<"Subtarget->hasSME2()">, AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; +def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, + AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasSVEorSME @@ -158,10 +160,13 @@ def HasSVE2orSME "sve2 or sme">; def HasSVE2p1_or_HasSME : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, - AssemblerPredicate<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; + AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; def HasSVE2p1_or_HasSME2 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, - AssemblerPredicate<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; + AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; +def HasSVE2p1_or_HasSME2p1 + : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, + AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasNEONorSME diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index e04c054205050..e4b2c09ec8d3a 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3485,6 +3485,7 @@ static const struct Extension { {"sme-f64f64", {AArch64::FeatureSMEF64F64}}, {"sme-i16i64", {AArch64::FeatureSMEI16I64}}, {"sme2", {AArch64::FeatureSME2}}, + {"sme2p1", {AArch64::FeatureSME2p1}}, {"hbc", {AArch64::FeatureHBC}}, {"mops", {AArch64::FeatureMOPS}}, // FIXME: Unsupported extensions diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s b/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s new file mode 100644 index 0000000000000..948d8f996c156 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch armv9-a+sme2p1 +.arch armv9-a+nosme2p1 +sqcvt z0.h, {z0.s, z1.s} +// CHECK: error: instruction requires: sme2 +// CHECK: sqcvt z0.h, {z0.s, z1.s} diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch.s b/llvm/test/MC/AArch64/SME2p1/directive-arch.s new file mode 100644 index 0000000000000..112de2530ca8b --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s + +// SME2p1 should imply SME2 +.arch armv9-a+sme2p1 +sqcvt z0.h, {z0.s, z1.s} +// CHECK: sqcvt z0.h, { z0.s, z1.s } + +.arch armv9-a+nosme2p1 diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s new file mode 100644 index 0000000000000..19f11b88173ff --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch_extension sme2p1 +.arch_extension nosme2 +sqcvt z0.h, { z0.s, z1.s } +// CHECK: error: instruction requires: sme2 +// CHECK: sqcvt z0.h diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s new file mode 100644 index 0000000000000..653956d733450 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch_extension sme2p1 +sqcvt z0.h, { z0.s, z1.s } +// CHECK: sqcvt z0.h, { z0.s, z1.s } diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s b/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s index b1109838b88b4..127b54cb240e4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s +++ b/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s b/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s index bbb8b15964797..28fa3759771bb 100644 --- a/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s +++ b/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/cntp.s b/llvm/test/MC/AArch64/SVE2p1/cntp.s index 817c5f05dddb0..6f6c6a0bc612a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/cntp.s +++ b/llvm/test/MC/AArch64/SVE2p1/cntp.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/fclamp.s b/llvm/test/MC/AArch64/SVE2p1/fclamp.s index bd52cba1d31fc..8512d6077699d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/fclamp.s +++ b/llvm/test/MC/AArch64/SVE2p1/fclamp.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/fdot.s b/llvm/test/MC/AArch64/SVE2p1/fdot.s index 3677adc444fe2..9005e1f7f0eb2 100644 --- a/llvm/test/MC/AArch64/SVE2p1/fdot.s +++ b/llvm/test/MC/AArch64/SVE2p1/fdot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1b.s b/llvm/test/MC/AArch64/SVE2p1/ld1b.s index 2692e1684af11..7e4b9ff9ee7d7 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d.s b/llvm/test/MC/AArch64/SVE2p1/ld1d.s index 2edfea2a72b8e..387e10ee283a8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1h.s b/llvm/test/MC/AArch64/SVE2p1/ld1h.s index 0146fb11caa08..833c940af5ef8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1q.s b/llvm/test/MC/AArch64/SVE2p1/ld1q.s index dc6e904d0fa0a..2adc657497d58 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1q.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1q.s @@ -5,7 +5,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w.s b/llvm/test/MC/AArch64/SVE2p1/ld1w.s index 2849ee106ba00..177b0a88be86b 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s index 4389f84bba6a9..e582b94730e60 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s index b459309405b5a..7ee3a20c1209d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s index 7027efeb2442c..acaca8f4d4f98 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s index 662d2c54ec3ab..fffc1e81750f8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/pext.s b/llvm/test/MC/AArch64/SVE2p1/pext.s index cefade005a6ca..86a79316a1e6c 100644 --- a/llvm/test/MC/AArch64/SVE2p1/pext.s +++ b/llvm/test/MC/AArch64/SVE2p1/pext.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ptrue.s b/llvm/test/MC/AArch64/SVE2p1/ptrue.s index ae2a3456ff3cb..c499460ea9a13 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ptrue.s +++ b/llvm/test/MC/AArch64/SVE2p1/ptrue.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sdot.s b/llvm/test/MC/AArch64/SVE2p1/sdot.s index 3f5517ae7f26a..13d4e2d08cf61 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sdot.s +++ b/llvm/test/MC/AArch64/SVE2p1/sdot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s b/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s index 5bb66364d4907..b50e2ff47afad 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s b/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s index 573da67149986..b17e6a4757788 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s b/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s index 1ee4e7aa75da9..4ae7bd32b1b69 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s b/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s index ba47e02e297ff..c88b430b18e92 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1b.s b/llvm/test/MC/AArch64/SVE2p1/st1b.s index eeadf4799fe90..9293d1fbe272c 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d.s b/llvm/test/MC/AArch64/SVE2p1/st1d.s index 4b9451e92b85b..367bc7d43e6b1 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1h.s b/llvm/test/MC/AArch64/SVE2p1/st1h.s index 2f855cb4688ff..d81900856197a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1q.s b/llvm/test/MC/AArch64/SVE2p1/st1q.s index 313e7d7e0f17e..5bf1892339699 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1q.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1q.s @@ -5,7 +5,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w.s b/llvm/test/MC/AArch64/SVE2p1/st1w.s index 33703969bc023..374b03da30c2f 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1b.s b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s index 7b03e20279a51..7f0ec3160d99d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1d.s b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s index 6b0215d943665..af87f2c388afb 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1h.s b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s index 954494c6cc330..433a3fdeea9b3 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1w.s b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s index fba4873f8c720..f9836869eb6c8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/udot.s b/llvm/test/MC/AArch64/SVE2p1/udot.s index c88cc631543a1..2c3628b5dbc56 100644 --- a/llvm/test/MC/AArch64/SVE2p1/udot.s +++ b/llvm/test/MC/AArch64/SVE2p1/udot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s b/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s index 68be7a1670703..701fee430dc65 100644 --- a/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s +++ b/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s b/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s index b68223edc5169..e8d2e1239bbda 100644 --- a/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s +++ b/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilege.s b/llvm/test/MC/AArch64/SVE2p1/whilege.s index f1acef11f69c6..1b6f09087ef23 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilege.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilege.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilegt.s b/llvm/test/MC/AArch64/SVE2p1/whilegt.s index 69e3bc93c0c7a..e7c2badb443d4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilegt.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilegt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilehi.s b/llvm/test/MC/AArch64/SVE2p1/whilehi.s index ddb7a6829c920..2c7b7f75be7b4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilehi.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilehi.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilehs.s b/llvm/test/MC/AArch64/SVE2p1/whilehs.s index 0a4c50da6d4c1..f199c561d4c56 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilehs.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilehs.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilele.s b/llvm/test/MC/AArch64/SVE2p1/whilele.s index f1824ecc8a411..44e8aab57c7c6 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilele.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilele.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilelo.s b/llvm/test/MC/AArch64/SVE2p1/whilelo.s index 5be4b66fc8c09..9d4a8442fdfc5 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilelo.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilelo.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilels.s b/llvm/test/MC/AArch64/SVE2p1/whilels.s index 89d98517cd498..9412373a8580f 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilels.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilels.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilelt.s b/llvm/test/MC/AArch64/SVE2p1/whilelt.s index dd0d3b77f00ae..d0d02d0fc351a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilelt.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilelt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 951d4f371562a..904a7316c5046 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1519,7 +1519,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_BRBE, AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, AArch64::AEK_SME, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64, AArch64::AEK_SME2, AArch64::AEK_HBC, AArch64::AEK_MOPS, - AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1}; + AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, AArch64::AEK_SME2p1}; std::vector Features; @@ -1578,6 +1578,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64f64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-i16i64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme2")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1")); EXPECT_TRUE(llvm::is_contained(Features, "+hbc")); EXPECT_TRUE(llvm::is_contained(Features, "+mops")); EXPECT_TRUE(llvm::is_contained(Features, "+perfmon")); @@ -1658,6 +1659,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"}, {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"}, {"sme2", "nosme2", "+sme2", "-sme2"}, + {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"}, {"hbc", "nohbc", "+hbc", "-hbc"}, {"mops", "nomops", "+mops", "-mops"}, {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"}, From 4c44fa1c3829c2d0c6ce10b576dafbc2e0631d47 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Mon, 7 Nov 2022 12:47:11 +0000 Subject: [PATCH 418/516] [Assignment Tracking][5.1/*] Add deleteAssignmentMarkers function deleteAssignmentMarkers(const Instruction *Inst) does exactly as you'd expect - it deletes any dbg.assign intrinsics linked to Inst. Reviewed By: jmorse Differential Revision: https://reviews.llvm.org/D133576 --- llvm/include/llvm/IR/DebugInfo.h | 3 +++ llvm/lib/IR/DebugInfo.cpp | 9 +++++++++ llvm/unittests/IR/DebugInfoTest.cpp | 22 +++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 8f49d39f373a5..3987f5bbbd8eb 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -212,6 +212,9 @@ inline AssignmentMarkerRange getAssignmentMarkers(const Instruction *Inst) { return make_range(Value::user_iterator(), Value::user_iterator()); } +/// Delete the llvm.dbg.assign intrinsics linked to \p Inst. +void deleteAssignmentMarkers(const Instruction *Inst); + /// Replace all uses (and attachments) of \p Old with \p New. void RAUW(DIAssignID *Old, DIAssignID *New); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 89b5ff218de15..a051a1bbb604c 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1661,6 +1661,15 @@ AssignmentMarkerRange at::getAssignmentMarkers(DIAssignID *ID) { return make_range(IDAsValue->user_begin(), IDAsValue->user_end()); } +void at::deleteAssignmentMarkers(const Instruction *Inst) { + auto Range = getAssignmentMarkers(Inst); + if (Range.empty()) + return; + SmallVector ToDelete(Range.begin(), Range.end()); + for (auto *DAI : ToDelete) + DAI->eraseFromParent(); +} + void at::RAUW(DIAssignID *Old, DIAssignID *New) { // Replace MetadataAsValue uses. if (auto *OldIDAsValue = diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 7cdd3ae2bb849..e58b4f562e591 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -398,6 +398,13 @@ TEST(AssignmentTrackingTest, Utils) { ret void, !dbg !19 } + define dso_local void @fun3() !dbg !21 { + entry: + %local = alloca i32, align 4, !DIAssignID !24 + call void @llvm.dbg.assign(metadata i32 undef, metadata !22, metadata !DIExpression(), metadata !24, metadata i32* undef, metadata !DIExpression()), !dbg !23 + ret void + } + declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) !llvm.dbg.cu = !{!0} @@ -425,6 +432,10 @@ TEST(AssignmentTrackingTest, Utils) { !18 = !DILocalVariable(name: "local2", scope: !17, file: !1, line: 2, type: !11) !19 = !DILocation(line: 4, column: 1, scope: !17) !20 = distinct !DIAssignID() + !21 = distinct !DISubprogram(name: "fun3", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !22 = !DILocalVariable(name: "local4", scope: !21, file: !1, line: 2, type: !11) + !23 = !DILocation(line: 4, column: 1, scope: !21) + !24 = distinct !DIAssignID() )"); // Check the test IR isn't malformed. @@ -483,7 +494,16 @@ TEST(AssignmentTrackingTest, Utils) { ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); EXPECT_EQ(*Fun2Insts.begin(), &Fun2Alloca); - // 3. Check that deleting works and applies only to the target function. + // 3. Check that deleting dbg.assigns from a specific instruction works. + Instruction &Fun3Alloca = + *M->getFunction("fun3")->getEntryBlock().getFirstNonPHIOrDbg(); + auto Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); + ASSERT_TRUE(std::distance(Fun3Markers.begin(), Fun3Markers.end()) == 1); + at::deleteAssignmentMarkers(&Fun3Alloca); + Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); + EXPECT_EQ(Fun3Markers.empty(), true); + + // 4. Check that deleting works and applies only to the target function. at::deleteAll(&Fun1); // There should now only be the alloca and ret in fun1. EXPECT_EQ(Fun1.begin()->size(), 2u); From 8ddd1ccdf89317be1c40fa9183e214878a56151e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 16 Sep 2022 13:57:04 -0700 Subject: [PATCH 419/516] [SLP]Redesign vectorization of the gather nodes. Gather nodes are vectorized as simply vector of the scalars instead of relying on the actual node. It leads to the fact that in some cases we may miss incorrect transformation (non-matching set of scalars is just ended as a gather node instead of possible vector/gather node). Better to rely on the actual nodes, it allows to improve stability and better detect missed cases. Differential Revision: https://reviews.llvm.org/D135174 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 336 +++++++++++------- .../SLPVectorizer/AArch64/matmul.ll | 66 ++-- .../SLPVectorizer/AArch64/slp-fma-loss.ll | 140 ++++---- .../SLPVectorizer/AArch64/splat-loads.ll | 64 ++-- .../SLPVectorizer/AArch64/tsc-s116.ll | 29 +- .../vectorizable-selects-uniform-cmps.ll | 16 +- .../vectorize-free-extracts-inserts.ll | 46 +-- .../SLPVectorizer/AMDGPU/packed-math.ll | 8 +- .../Transforms/SLPVectorizer/X86/PR35777.ll | 22 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 18 +- .../X86/alternate-cmp-swapped-pred.ll | 4 +- .../SLPVectorizer/X86/broadcast_long.ll | 5 +- .../SLPVectorizer/X86/buildvector-shuffle.ll | 6 +- .../Transforms/SLPVectorizer/X86/c-ray.ll | 20 +- .../Transforms/SLPVectorizer/X86/cmp_sel.ll | 8 +- .../SLPVectorizer/X86/commutativity.ll | 6 +- .../SLPVectorizer/X86/compare-reduce.ll | 22 +- .../SLPVectorizer/X86/crash_cmpop.ll | 40 +-- .../X86/crash_exceed_scheduling.ll | 34 +- llvm/test/Transforms/SLPVectorizer/X86/cse.ll | 36 +- .../X86/extract-scalar-from-undef.ll | 9 +- .../SLPVectorizer/X86/extract_in_tree_user.ll | 30 +- .../X86/extractelement-multiple-uses.ll | 10 +- .../SLPVectorizer/X86/extractelement.ll | 20 +- .../SLPVectorizer/X86/horizontal-list.ll | 20 +- .../SLPVectorizer/X86/in-tree-user.ll | 24 +- .../SLPVectorizer/X86/insert-shuffle.ll | 7 +- .../X86/jumbled-load-multiuse.ll | 9 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 84 ++--- .../X86/matched-shuffled-entries.ll | 24 +- .../SLPVectorizer/X86/ordering-bug.ll | 22 +- .../Transforms/SLPVectorizer/X86/partail.ll | 35 +- .../SLPVectorizer/X86/phi-undef-input.ll | 12 +- .../SLPVectorizer/X86/reduction2.ll | 32 +- .../X86/remark_extract_broadcast.ll | 2 +- .../SLPVectorizer/X86/reorder_phi.ll | 36 +- .../X86/reorder_with_external_users.ll | 68 ++-- .../SLPVectorizer/X86/reused-undefs.ll | 4 +- .../X86/scatter-vectorize-reused-pointer.ll | 16 +- .../X86/vectorize-widest-phis.ll | 2 +- 40 files changed, 733 insertions(+), 659 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ba44d4a77ca3a..53d5f67caff40 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2267,13 +2267,14 @@ class BoUpSLP { /// Vectorize a single entry in the tree. Value *vectorizeTree(TreeEntry *E); - /// Vectorize a single entry in the tree, starting in \p VL. - Value *vectorizeTree(ArrayRef VL); + /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry + /// \p E. + Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx); /// Create a new vector from a list of scalar values. Produces a sequence /// which exploits values reused across lanes, and arranges the inserts /// for ease of later optimization. - Value *createBuildVector(ArrayRef VL); + Value *createBuildVector(const TreeEntry *E); /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. If \p @@ -2376,6 +2377,12 @@ class BoUpSLP { return IsSame(Scalars, ReuseShuffleIndices); } + bool isOperandGatherNode(const EdgeInfo &UserEI) const { + return State == TreeEntry::NeedToGather && + UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx && + UserTreeIndices.front().UserTE == UserEI.UserTE; + } + /// \returns true if current entry has same operands as \p TE. bool hasEqualOperands(const TreeEntry &TE) const { if (TE.getNumOperands() != getNumOperands()) @@ -3910,17 +3917,22 @@ static bool isRepeatedNonIdentityClusteredMask(ArrayRef Mask, } void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { - // For vectorized and non-clustered reused - just reorder reuses mask. + // Reorder reuses mask. + reorderReuses(TE.ReuseShuffleIndices, Mask); const unsigned Sz = TE.Scalars.size(); - if (TE.State != TreeEntry::NeedToGather || !TE.ReorderIndices.empty() || + // For vectorized and non-clustered reused no need to do anything else. + if (TE.State != TreeEntry::NeedToGather || !ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices, Sz) || - !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz)) { - reorderReuses(TE.ReuseShuffleIndices, Mask); + !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz)) return; - } + SmallVector NewMask; + inversePermutation(TE.ReorderIndices, NewMask); + addMask(NewMask, TE.ReuseShuffleIndices); + // Clear reorder since it is going to be applied to the new mask. + TE.ReorderIndices.clear(); // Try to improve gathered nodes with clustered reuses, if possible. - reorderScalars(TE.Scalars, makeArrayRef(TE.ReuseShuffleIndices).slice(0, Sz)); + reorderScalars(TE.Scalars, makeArrayRef(NewMask).slice(0, Sz)); // Fill the reuses mask with the identity submasks. for (auto *It = TE.ReuseShuffleIndices.begin(), *End = TE.ReuseShuffleIndices.end(); @@ -8036,7 +8048,8 @@ class ShuffleInstructionBuilder { }; } // namespace -Value *BoUpSLP::vectorizeTree(ArrayRef VL) { +Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { + ArrayRef VL = E->getOperand(NodeIdx); const unsigned VF = VL.size(); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. @@ -8047,123 +8060,177 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { S = getSameOpcode(*It, *TLI); } if (S.getOpcode()) { - if (TreeEntry *E = getTreeEntry(S.OpValue)) - if (E->isSame(VL)) { - Value *V = vectorizeTree(E); - if (VF != cast(V->getType())->getNumElements()) { - if (!E->ReuseShuffleIndices.empty()) { - // Reshuffle to get only unique values. - // If some of the scalars are duplicated in the vectorization tree - // entry, we do not vectorize them but instead generate a mask for - // the reuses. But if there are several users of the same entry, - // they may have different vectorization factors. This is especially - // important for PHI nodes. In this case, we need to adapt the - // resulting instruction for the user vectorization factor and have - // to reshuffle it again to take only unique elements of the vector. - // Without this code the function incorrectly returns reduced vector - // instruction with the same elements, not with the unique ones. - - // block: - // %phi = phi <2 x > { .., %entry} {%shuffle, %block} - // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> - // ... (use %2) - // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} - // br %block - SmallVector UniqueIdxs(VF, UndefMaskElem); - SmallSet UsedIdxs; - int Pos = 0; - int Sz = VL.size(); - for (int Idx : E->ReuseShuffleIndices) { - if (Idx != Sz && Idx != UndefMaskElem && - UsedIdxs.insert(Idx).second) - UniqueIdxs[Idx] = Pos; - ++Pos; - } - assert(VF >= UsedIdxs.size() && "Expected vectorization factor " - "less than original vector size."); - UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem); - V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); - } else { - assert(VF < cast(V->getType())->getNumElements() && - "Expected vectorization factor less " - "than original vector size."); - SmallVector UniformMask(VF, 0); - std::iota(UniformMask.begin(), UniformMask.end(), 0); - V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); - } - if (auto *I = dyn_cast(V)) { - GatherShuffleExtractSeq.insert(I); - CSEBlocks.insert(I->getParent()); + if (TreeEntry *VE = getTreeEntry(S.OpValue); VE && VE->isSame(VL)) { + assert((any_of(VE->UserTreeIndices, + [E, NodeIdx](const EdgeInfo &EI) { + return EI.EdgeIdx == NodeIdx && EI.UserTE == E; + }) || + any_of(VectorizableTree, + [E, NodeIdx, VE](const std::unique_ptr &TE) { + return TE->isOperandGatherNode({E, NodeIdx}) && + VE->isSame(TE->Scalars); + })) && + "Expected same vectorizable node."); + Value *V = vectorizeTree(VE); + if (VF != cast(V->getType())->getNumElements()) { + if (!VE->ReuseShuffleIndices.empty()) { + // Reshuffle to get only unique values. + // If some of the scalars are duplicated in the vectorization + // tree entry, we do not vectorize them but instead generate a + // mask for the reuses. But if there are several users of the + // same entry, they may have different vectorization factors. + // This is especially important for PHI nodes. In this case, we + // need to adapt the resulting instruction for the user + // vectorization factor and have to reshuffle it again to take + // only unique elements of the vector. Without this code the + // function incorrectly returns reduced vector instruction with + // the same elements, not with the unique ones. + + // block: + // %phi = phi <2 x > { .., %entry} {%shuffle, %block} + // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> + // ... (use %2) + // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} + // br %block + SmallVector UniqueIdxs(VF, UndefMaskElem); + SmallSet UsedIdxs; + int Pos = 0; + for (int Idx : VE->ReuseShuffleIndices) { + if (Idx != static_cast(VF) && Idx != UndefMaskElem && + UsedIdxs.insert(Idx).second) + UniqueIdxs[Idx] = Pos; + ++Pos; } + assert(VF >= UsedIdxs.size() && "Expected vectorization factor " + "less than original vector size."); + UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem); + V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); + } else { + assert(VF < cast(V->getType())->getNumElements() && + "Expected vectorization factor less " + "than original vector size."); + SmallVector UniformMask(VF, 0); + std::iota(UniformMask.begin(), UniformMask.end(), 0); + V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); + } + if (auto *I = dyn_cast(V)) { + GatherShuffleExtractSeq.insert(I); + CSEBlocks.insert(I->getParent()); } - return V; } + return V; + } } - // Can't vectorize this, so simply build a new vector with each lane - // corresponding to the requested value. - return createBuildVector(VL); + // Find the corresponding gather entry and vectorize it. + // Allows to be more accurate with tree/graph transformations, checks for the + // correctness of the transformations in many cases. + auto *I = find_if(VectorizableTree, + [E, NodeIdx](const std::unique_ptr &TE) { + return TE->isOperandGatherNode({E, NodeIdx}); + }); + assert(I != VectorizableTree.end() && "Gather node is not in the graph."); + assert(I->get()->UserTreeIndices.size() == 1 && + "Expected only single user for the gather node."); + assert(I->get()->isSame(VL) && "Expected same list of scalars."); + return vectorizeTree(I->get()); } -Value *BoUpSLP::createBuildVector(ArrayRef VL) { - assert(any_of(VectorizableTree, - [VL](const std::unique_ptr &TE) { - return TE->State == TreeEntry::NeedToGather && TE->isSame(VL); - }) && - "Non-matching gather node."); - unsigned VF = VL.size(); - // Exploit possible reuse of values across lanes. - SmallVector ReuseShuffleIndicies; - SmallVector UniqueValues; - if (VL.size() > 2) { + +Value *BoUpSLP::createBuildVector(const TreeEntry *E) { + assert(E->State == TreeEntry::NeedToGather && "Expected gather node."); + unsigned VF = E->getVectorFactor(); + + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, + CSEBlocks); + SmallVector Gathered( + VF, PoisonValue::get(E->Scalars.front()->getType())); + bool NeedFreeze = false; + SmallVector VL(E->Scalars.begin(), E->Scalars.end()); + // Build a mask out of the redorder indices and reorder scalars per this mask. + SmallVector ReorderMask; + inversePermutation(E->ReorderIndices, ReorderMask); + if (!ReorderMask.empty()) + reorderScalars(VL, ReorderMask); + if (!allConstant(VL)) { + // For splats with can emit broadcasts instead of gathers, so try to find + // such sequences. + bool IsSplat = isSplat(VL) && (VL.size() > 2 || VL.front() == VL.back()); + SmallVector ReuseMask(VF, UndefMaskElem); + SmallVector UndefPos; DenseMap UniquePositions; - unsigned NumValues = - std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { - return !isa(V); - }).base()); - VF = std::max(VF, PowerOf2Ceil(NumValues)); - int UniqueVals = 0; - for (Value *V : VL.drop_back(VL.size() - VF)) { + // Gather unique non-const values and all constant values. + // For repeated values, just shuffle them. + for (auto [I, V] : enumerate(VL)) { if (isa(V)) { - ReuseShuffleIndicies.emplace_back(UndefMaskElem); + if (!isa(V)) { + Gathered[I] = V; + ReuseMask[I] = I; + UndefPos.push_back(I); + } continue; } if (isConstant(V)) { - ReuseShuffleIndicies.emplace_back(UniqueValues.size()); - UniqueValues.emplace_back(V); + Gathered[I] = V; + ReuseMask[I] = I; continue; } - auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); - ReuseShuffleIndicies.emplace_back(Res.first->second); - if (Res.second) { - UniqueValues.emplace_back(V); - ++UniqueVals; - } - } - if (UniqueVals == 1 && UniqueValues.size() == 1) { - // Emit pure splat vector. - ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), - UndefMaskElem); - } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { - if (UniqueValues.empty()) { - assert(all_of(VL, UndefValue::classof) && "Expected list of undefs."); - NumValues = VF; + if (IsSplat) { + Gathered.front() = V; + ReuseMask[I] = 0; + } else { + const auto Res = UniquePositions.try_emplace(V, I); + Gathered[Res.first->second] = V; + ReuseMask[I] = Res.first->second; + } + } + if (!UndefPos.empty() && IsSplat) { + // For undef values, try to replace them with the simple broadcast. + // We can do it if the broadcasted value is guaranteed to be + // non-poisonous, or by freezing the incoming scalar value first. + auto *It = find_if(Gathered, [this, E](Value *V) { + return !isa(V) && + (getTreeEntry(V) || isGuaranteedNotToBePoison(V) || + any_of(V->uses(), [E](const Use &U) { + // Check if the value already used in the same operation in + // one of the nodes already. + return E->UserTreeIndices.size() == 1 && + is_contained( + E->UserTreeIndices.front().UserTE->Scalars, + U.getUser()) && + E->UserTreeIndices.front().EdgeIdx != U.getOperandNo(); + })); + }); + if (It != Gathered.end()) { + // Replace undefs by the non-poisoned scalars and emit broadcast. + int Pos = std::distance(Gathered.begin(), It); + for_each(UndefPos, [&](int I) { + // Set the undef position to the non-poisoned scalar. + ReuseMask[I] = Pos; + // Replace the undef by the poison, in the mask it is replaced by non-poisoned scalar already. + if (I != Pos) + Gathered[I] = PoisonValue::get(Gathered[I]->getType()); + }); + } else { + // Replace undefs by the poisons, emit broadcast and then emit + // freeze. + for_each(UndefPos, [&](int I) { + ReuseMask[I] = UndefMaskElem; + if (isa(Gathered[I])) + Gathered[I] = PoisonValue::get(Gathered[I]->getType()); + }); + NeedFreeze = true; } - ReuseShuffleIndicies.clear(); - UniqueValues.clear(); - UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); } - UniqueValues.append(VF - UniqueValues.size(), - PoisonValue::get(VL[0]->getType())); - VL = UniqueValues; - } - - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, - CSEBlocks); - Value *Vec = gather(VL); - if (!ReuseShuffleIndicies.empty()) { - ShuffleBuilder.addMask(ReuseShuffleIndicies); - Vec = ShuffleBuilder.finalize(Vec); - } + ShuffleBuilder.addMask(ReuseMask); + } else { + copy(VL, Gathered.begin()); + } + // Gather unique scalars and all constants. + Value *Vec = gather(Gathered); + ShuffleBuilder.addMask(E->ReuseShuffleIndices); + Vec = ShuffleBuilder.finalize(Vec); + if (NeedFreeze) + Vec = Builder.CreateFreeze(Vec); return Vec; } @@ -8180,6 +8247,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, CSEBlocks); if (E->State == TreeEntry::NeedToGather) { + if (E->Idx > 0) { + // We are in the middle of a vectorizable chain. We need to gather the + // scalars from the users. + Value *Vec = createBuildVector(E); + E->VectorizedValue = Vec; + return Vec; + } if (E->getMainOp()) setInsertPointAfterBundle(E); Value *Vec; @@ -8256,7 +8330,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Builder.SetInsertPoint(IBB->getTerminator()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - Value *Vec = vectorizeTree(E->getOperand(i)); + Value *Vec = vectorizeOperand(E, i); NewPhi->addIncoming(Vec, IBB); } @@ -8290,7 +8364,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::InsertElement: { assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique"); Builder.SetInsertPoint(cast(E->Scalars.back())); - Value *V = vectorizeTree(E->getOperand(1)); + Value *V = vectorizeOperand(E, 1); // Create InsertVector shuffle if necessary auto *FirstInsert = cast(*find_if(E->Scalars, [E](Value *V) { @@ -8396,7 +8470,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::BitCast: { setInsertPointAfterBundle(E); - Value *InVec = vectorizeTree(E->getOperand(0)); + Value *InVec = vectorizeOperand(E, 0); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8417,8 +8491,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::ICmp: { setInsertPointAfterBundle(E); - Value *L = vectorizeTree(E->getOperand(0)); - Value *R = vectorizeTree(E->getOperand(1)); + Value *L = vectorizeOperand(E, 0); + Value *R = vectorizeOperand(E, 1); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8439,9 +8513,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Select: { setInsertPointAfterBundle(E); - Value *Cond = vectorizeTree(E->getOperand(0)); - Value *True = vectorizeTree(E->getOperand(1)); - Value *False = vectorizeTree(E->getOperand(2)); + Value *Cond = vectorizeOperand(E, 0); + Value *True = vectorizeOperand(E, 1); + Value *False = vectorizeOperand(E, 2); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8460,7 +8534,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::FNeg: { setInsertPointAfterBundle(E); - Value *Op = vectorizeTree(E->getOperand(0)); + Value *Op = vectorizeOperand(E, 0); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8502,8 +8576,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Xor: { setInsertPointAfterBundle(E); - Value *LHS = vectorizeTree(E->getOperand(0)); - Value *RHS = vectorizeTree(E->getOperand(1)); + Value *LHS = vectorizeOperand(E, 0); + Value *RHS = vectorizeOperand(E, 1); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8550,7 +8624,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } } else { assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); - Value *VecPtr = vectorizeTree(E->getOperand(0)); + Value *VecPtr = vectorizeOperand(E, 0); // Use the minimum alignment of the gathered loads. Align CommonAlignment = LI->getAlign(); for (Value *V : E->Scalars) @@ -8573,7 +8647,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E); - Value *VecValue = vectorizeTree(E->getOperand(0)); + Value *VecValue = vectorizeOperand(E, 0); ShuffleBuilder.addMask(E->ReorderIndices); VecValue = ShuffleBuilder.finalize(VecValue); @@ -8604,11 +8678,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { auto *GEP0 = cast(VL0); setInsertPointAfterBundle(E); - Value *Op0 = vectorizeTree(E->getOperand(0)); + Value *Op0 = vectorizeOperand(E, 0); SmallVector OpVecs; for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) { - Value *OpVec = vectorizeTree(E->getOperand(J)); + Value *OpVec = vectorizeOperand(E, J); OpVecs.push_back(OpVec); } @@ -8662,7 +8736,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { continue; } - Value *OpVec = vectorizeTree(E->getOperand(j)); + Value *OpVec = vectorizeOperand(E, j); LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j)) @@ -8717,11 +8791,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = nullptr, *RHS = nullptr; if (Instruction::isBinaryOp(E->getOpcode()) || isa(VL0)) { setInsertPointAfterBundle(E); - LHS = vectorizeTree(E->getOperand(0)); - RHS = vectorizeTree(E->getOperand(1)); + LHS = vectorizeOperand(E, 0); + RHS = vectorizeOperand(E, 1); } else { setInsertPointAfterBundle(E); - LHS = vectorizeTree(E->getOperand(0)); + LHS = vectorizeOperand(E, 0); } if (E->VectorizedValue) { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll index 967a4dca29ce5..9889d8d9e444a 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll @@ -25,42 +25,42 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TEMP]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TEMP2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>* +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>* ; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>* -; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> [[TMP4]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>* -; CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[TMP16]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP9]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP15]], [[TMP18]] -; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP19]], <2 x double>* [[TMP20]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>* +; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>* +; CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[TMP14]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP13]], [[TMP16]] +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8 ; CHECK-NEXT: [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 4 -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[TMP21]], double [[TEMP10]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[TMP2]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x double> [[TMP24]], double [[TEMP11]], i32 1 -; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP7]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP23]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0 +; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE4]] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0 +; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE5]] +; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[TMP20]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP23]], <2 x double>* [[TMP24]], align 8 ; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6 -; CHECK-NEXT: [[TMP29:%.*]] = fmul <2 x double> [[TMP14]], [[TMP22]] -; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[TMP17]], [[TMP25]] -; CHECK-NEXT: [[TMP31:%.*]] = fadd <2 x double> [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[TMP32]], align 8 +; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[TMP12]], [[SHUFFLE4]] +; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP15]], [[SHUFFLE5]] +; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8 ; CHECK-NEXT: ret void ; %arrayidx1.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll index 9a0b68f0dc640..3327cb0e51a8e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll @@ -10,18 +10,18 @@ define void @slp_not_profitable_with_fast_fmf(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -50,18 +50,18 @@ define void @slp_not_profitable_with_reassoc_fmf(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub reassoc <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub reassoc <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd reassoc <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -91,18 +91,18 @@ define void @slp_profitable_missing_fmf_on_fadd_fsub(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -132,18 +132,18 @@ define void @slp_profitable_missing_fmf_on_fmul_fadd_fsub(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -173,18 +173,18 @@ define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fsub nnan <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd nnan <2 x float> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan <2 x float> [[SHUFFLE1]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP6:%.*]] = fsub nnan <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd nnan <2 x float> [[TMP5]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -267,16 +267,16 @@ define void @slp_profitable(ptr %A, ptr %B, float %0) { ; CHECK-NEXT: [[SUB_I1096:%.*]] = fsub fast float 1.000000e+00, [[TMP0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[SUB_I1096]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[SHUFFLE]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x float> [[SHUFFLE]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0 +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[SHUFFLE1]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x float> [[SHUFFLE1]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[B:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll index 9f2bf46d6f0d0..d9f33c2b5b6f8 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll @@ -16,14 +16,14 @@ define void @splat_loads_double(double *%array1, double *%array2, double *%ptrA, ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -63,14 +63,14 @@ define void @splat_loads_float(float *%array1, float *%array2, float *%ptrA, flo ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[LD_2_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[LD_2_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[LD_2_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -110,14 +110,14 @@ define void @splat_loads_i64(i64 *%array1, i64 *%array2, i64 *%ptrA, i64 *%ptrB) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[LD_2_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[LD_2_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -157,14 +157,14 @@ define void @splat_loads_i32(i32 *%array1, i32 *%array2, i32 *%ptrA, i32 *%ptrB) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LD_2_0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[LD_2_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index 64c1af4c3035d..b96862f0d0b21 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -19,23 +19,22 @@ define void @s116_modified(float* %a) { ; CHECK-LABEL: @s116_modified( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 -; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 4 -; CHECK-NEXT: [[LD1:%.*]] = load float, float* [[GEP1]], align 4 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 ; CHECK-NEXT: [[LD0:%.*]] = load float, float* [[GEP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP2]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x float> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index f1241370626fb..7eedbe98d4fa9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -518,10 +518,10 @@ define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -585,10 +585,10 @@ define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 2 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 01ba01ecc2bcb..2b792da557188 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -60,11 +60,11 @@ define void @extracts_first_2_lanes_different_vectors(<2 x double>* %ptr.1, <4 x ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V3_LANE_1]]) -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -102,12 +102,12 @@ define void @noop_extract_second_2_lanes(<4 x double>* %ptr.1, <4 x double>* %pt ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -139,14 +139,14 @@ define void @extract_reverse_order(<2 x double>* %ptr.1, <4 x double>* %ptr.2) { ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 +; CHECK-NEXT: call void @use(double [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 ; CHECK-NEXT: call void @use(double [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[V_1]], i32 1 -; CHECK-NEXT: call void @use(double [[TMP5]]) -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -182,12 +182,12 @@ define void @extract_lanes_1_and_2(<4 x double>* %ptr.1, <4 x double>* %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) -; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -288,15 +288,15 @@ define void @extracts_jumbled_4_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll index 7ab2df33692ef..d4ada814be303 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll @@ -115,10 +115,10 @@ define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar, ; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)* ; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2 ; GCN-NEXT: [[TMP3:%.*]] = insertelement <2 x half> poison, half [[SCALAR:%.*]], i32 0 -; GCN-NEXT: [[TMP4:%.*]] = insertelement <2 x half> [[TMP3]], half [[SCALAR]], i32 1 -; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]] -; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)* -; GCN-NEXT: store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2 +; GCN-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x half> [[TMP3]], <2 x half> poison, <2 x i32> zeroinitializer +; GCN-NEXT: [[TMP4:%.*]] = fmul <2 x half> [[TMP2]], [[SHUFFLE]] +; GCN-NEXT: [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)* +; GCN-NEXT: store <2 x half> [[TMP4]], <2 x half> addrspace(3)* [[TMP5]], align 2 ; GCN-NEXT: ret void ; %i0 = load half, half addrspace(3)* %a, align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll index 6c2cd2aa43c4a..d6e016e62266e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -9,17 +9,17 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 -; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 -; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = fptosi <2 x double> [[TMP6]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i32> [[TMP7]] to <2 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1 +; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP10]], 1 ; CHECK-NEXT: ret { i64, i64 } [[T17]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index cb50607383cd7..2f0bcfc4b5d85 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -6,17 +6,17 @@ define void @Test(i32) { ; CHECK-LABEL: @Test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP14:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE6]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE7]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE7]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE8]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP8]] @@ -25,10 +25,10 @@ define void @Test(i32) { ; CHECK-NEXT: [[OP_RDX4:%.*]] = and i32 [[OP_RDX2]], [[OP_RDX3]] ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX4]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP14]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> +; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP9]], [[SHUFFLE6]] +; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP9]], [[SHUFFLE6]] +; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll index c7a8392defc14..6fda6be46eec2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll @@ -5,9 +5,9 @@ define i16 @test(i16 %call37) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = load i16, i16* undef, align 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37:%.*]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37:%.*]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll index 4eeb422caabaa..e3e6910b931ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -19,8 +19,9 @@ define void @bcast_long(i32 *%A, i32 *%S) { ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = freeze <8 x i32> [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[TMP1]], <8 x i32>* [[TMP2]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 0f15a06eb5ab7..79a4054b63a2b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -46,11 +46,11 @@ define void @test(float %a) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[A]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll index 7e241144afc53..6b104f33e0c6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll @@ -71,19 +71,19 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x double> [[TMP28]], double [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP30:%.*]] = fsub <2 x double> [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x double> poison, double [[MUL88]], i32 0 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[TMP31]], double [[MUL88]], i32 1 -; CHECK-NEXT: [[TMP33:%.*]] = fdiv <2 x double> [[TMP30]], [[TMP32]] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP33]], i32 1 -; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i32 0 -; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP35]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = fdiv <2 x double> [[TMP30]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP32]], i32 1 +; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP33]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP32]], i32 0 +; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 -; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]] +; CHECK-NEXT: [[TMP35:%.*]] = fcmp ule <2 x double> [[TMP32]], +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP35]], i32 0 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP35]], i32 1 +; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP37]], i1 true, i1 [[TMP36]] ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[OR_COND106]] to i32 ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll index bdbcc5cb51b8f..600cf2539ced4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll @@ -14,10 +14,10 @@ define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, doub ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[G]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[TMP4]], <2 x double> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll index 67ca7282b002d..abe08f7c693bb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -97,9 +97,9 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1 ; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3 -; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]] -; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 +; AVX-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]] +; AVX-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; AVX-NEXT: ret void ; %add1 = add i32 %c, %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index c1434c78129ab..10e1d9123a7f6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -11,20 +11,20 @@ define void @reduce_compare(double* nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 9e981230e5862..d9655b7444005 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -12,32 +12,32 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> -; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x float> , <2 x float> [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 -; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> -; CHECK-NEXT: [[TMP18:%.*]] = fcmp olt <2 x float> [[TMP17]], -; CHECK-NEXT: [[TMP19]] = select <2 x i1> [[TMP18]], <2 x float> , <2 x float> [[TMP17]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[SHUFFLE]] +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> +; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> , <2 x float> [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 +; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> +; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], +; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> , <2 x float> [[TMP16]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 7e92b130d307b..65817d74ee54f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -5,19 +5,19 @@ define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP5]], undef ; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP5]], undef ; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef @@ -27,16 +27,16 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP6]], [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[TMP12]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] @@ -45,7 +45,7 @@ define void @exceed(double %0, double %1) { ; CHECK: bb2: ; CHECK-NEXT: br label [[LABEL]] ; CHECK: label: -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15]], [[BB2]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x double> [ [[TMP10]], [[BB1]] ], [ [[TMP13]], [[BB2]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll index c8d073fa243b5..3e7896b5b4f4e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -246,22 +246,22 @@ define i32 @partial_mrg(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4 ; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret i32 0 @@ -352,18 +352,18 @@ define void @cse_for_hoisted_instructions_in_preheader(i32* %dst, i32 %a, i1 %c) ; CHECK-LABEL: @cse_for_hoisted_instructions_in_preheader( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> , [[SHUFFLE]] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 10 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index da4c2424066c5..c3b98765d6e45 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -6,11 +6,10 @@ define i64 @foo(i32 %tmp7) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[TMP7:%.*]], i32 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 6 +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll index ec36710fdb3a5..0c4ff04209f04 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -11,12 +11,12 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP6]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP5]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -94,15 +94,15 @@ define void @externally_used_ptrs() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll index fc1ab867faa54..33959dc48f296 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll @@ -16,11 +16,11 @@ define float @multi_uses(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @multi_uses( ; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Y1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] ; CHECK-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index 2d8707ea68c2d..e24d3a6c6776e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -84,21 +84,21 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; THRESH1-LABEL: @f_used_twice_in_tree( ; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 ; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] +; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( ; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 ; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 -; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] +; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] +; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index b15d3f70b2317..cd113e89ada0e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -769,11 +769,11 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0 -; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP3]], [[TMP5]] -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]] +; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP3]], [[SHUFFLE]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 +; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP6]], [[TMP7]] ; THRESHOLD-NEXT: ret float [[OP_RDX2]] ; entry: @@ -897,11 +897,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[CONVC]], i32 1 ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0 -; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP4]], [[TMP6]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 -; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] +; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> zeroinitializer +; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP4]], [[SHUFFLE]] +; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00 ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll index e703928a8077a..36c1a8d4fc071 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll @@ -12,21 +12,21 @@ define void @in_tree_user(double* nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP7]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]] ; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index 3cc92d2030e66..d301ff8734cc0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -12,10 +12,9 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index 225155d93cb58..b626a2f84cd3d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,10 +9,11 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index e25361f7737d5..168ffb62cdcc0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -444,14 +444,14 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4 ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[LOADA1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 @@ -669,16 +669,16 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double* ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[LOADA0]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1 -; AVX-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; AVX-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[LOADA1]], i32 1 -; AVX-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]] -; AVX-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; AVX-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE]] +; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[EXTRB1]], i32 1 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] +; AVX-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; AVX-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; AVX-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 @@ -739,15 +739,15 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub ; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 -; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] -; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]] +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] ; AVX-NEXT: ret double [[ADD3]] ; entry: @@ -789,11 +789,11 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] -; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] +; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE1]] +; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( @@ -806,16 +806,16 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 -; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] -; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] -; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = fsub <2 x double> [[TMP8]], [[TMP3]] -; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 -; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 -; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP10]], [[TMP11]] +; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] +; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] +; AVX-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE]] +; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] ; AVX-NEXT: ret double [[RES]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index e6cad5c9a88c6..118372d2d5898 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -10,18 +10,18 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> , i32 [[SUB102_1]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 5 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 6 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> , i32 [[SUB86_1]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 12 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll index 3b75850b87082..604b833197893 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -17,21 +17,21 @@ define void @f(i1 %x) #0 { ; CHECK-NEXT: [[ICMP_A1:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @b to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i1> poison, i1 [[ICMP_A1]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP3]], i1 [[ICMP_A1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[SHUFFLE]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]] ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP5]], [[WHILE_BODY_LR_PH]] ] -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 -; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_BODY_LR_PH]] ] +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP7]], 0 ; CHECK-NEXT: br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] -; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP7]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP5]], <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i64> [[TMP9]], [[TMP6]] +; CHECK-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index b7fb6c0176127..a7b494e007cc4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -17,23 +17,24 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] +; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i32> [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll index 2a7e6d6697061..88f75c37846ef 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -14,7 +14,7 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -51,7 +51,7 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -88,7 +88,7 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -126,7 +126,7 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -164,7 +164,7 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -201,7 +201,7 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index fdc8b10100572..fcea56b282fd6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -94,19 +94,19 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP7]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = fcmp ule <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP10]], [[TMP11]] ; CHECK-NEXT: ret i1 [[NOT_OR_COND9]] ; CHECK: cleanup: ; CHECK-NEXT: ret i1 false @@ -143,12 +143,12 @@ define i1 @fcmp_lt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[MUL]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP5]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 +; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP9]], [[TMP10]] ; CHECK-NEXT: ret i1 [[NOT_OR_COND]] ; %fneg = fneg double %b diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index d25c77ca34841..873948c9596f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,7 +9,7 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll index 591537e4e37ea..c43753d995a77 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll @@ -9,8 +9,8 @@ define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R ; CHECK-NEXT: [[TMP0:%.*]] = add i64 256, 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP20:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], %struct.complex* [[A:%.*]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[B:%.*]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP4]], align 4 @@ -19,23 +19,23 @@ define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <2 x float>* ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[TMP9]], [[TMP14]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP15]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = fsub <2 x float> [[TMP12]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x float> [[TMP12]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> [[TMP17]], <2 x i32> -; CHECK-NEXT: [[TMP19]] = fadd <2 x float> [[TMP2]], [[TMP18]] -; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], [[TMP0]] -; CHECK-NEXT: br i1 [[TMP21]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE1]] +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[TMP11]], [[SHUFFLE2]] +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x i32> +; CHECK-NEXT: [[TMP17]] = fadd <2 x float> [[TMP2]], [[TMP16]] +; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[TMP23]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP17]], <2 x float>* [[TMP21]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll index aae0a078692f5..1b31f8a3a98d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -13,18 +13,18 @@ define void @rotate_with_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PTRA1]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRA1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP5]] ; CHECK-NEXT: ret void ; bb1: @@ -117,22 +117,22 @@ define void @addsub_and_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], ; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[SHUFFLE1]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] ; CHECK-NEXT: ret void ; bb1: @@ -167,21 +167,21 @@ define void @subadd_and_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], ; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP8]] ; CHECK-NEXT: ret void ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll index 8e26c28219788..ce6a477f30a08 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll @@ -6,12 +6,12 @@ define i32 @main(i32 %0) { ; CHECK-NEXT: for.cond.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] ; CHECK: for.inc.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 ; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[L1_PREHEADER]] ; CHECK: L1.preheader: ; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll index b7d6827e25123..7438f4e6a5cdc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -13,16 +13,16 @@ define void @test(i1 %c, ptr %arg) { ; CHECK: else: ; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARG]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP11]], [[ELSE]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ] ; CHECK-NEXT: ret void ; br i1 %c, label %if, label %else diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 28759385d5539..9f5b8ca0e8d1e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -8,7 +8,7 @@ define void @foo() { ; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: From 80378a4ca725eeeae940b99220b3913f7b73c895 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Mon, 7 Nov 2022 15:12:18 +0000 Subject: [PATCH 420/516] [NFC] Move getDebugValueLoc from static in Local.cpp to DebugInfo.h Move getDebugValueLoc so that it can be accessed from DebugInfo.h for the Assignment Tracking patch stack and remove redundant parameter Src. Reviewed By: jryans Differential Revision: https://reviews.llvm.org/D132357 --- llvm/include/llvm/IR/DebugInfo.h | 4 ++++ llvm/lib/IR/DebugInfo.cpp | 12 ++++++++++++ llvm/lib/Transforms/Utils/Local.cpp | 21 ++++----------------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 3987f5bbbd8eb..02f4da19c3463 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -49,6 +49,10 @@ void findDbgUsers(SmallVectorImpl &DbgInsts, Value *V); /// Find subprogram that is enclosing this scope. DISubprogram *getDISubprogram(const MDNode *Scope); +/// Produce a DebugLoc to use for each dbg.declare that is promoted to a +/// dbg.value. +DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII); + /// Strip debug info in the module if it exists. /// /// To do this, we remove all calls to the debugger intrinsics and any named diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index a051a1bbb604c..64d606ec15a60 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -140,6 +140,18 @@ DISubprogram *llvm::getDISubprogram(const MDNode *Scope) { return nullptr; } +DebugLoc llvm::getDebugValueLoc(DbgVariableIntrinsic *DII) { + // Original dbg.declare must have a location. + const DebugLoc &DeclareLoc = DII->getDebugLoc(); + MDNode *Scope = DeclareLoc.getScope(); + DILocation *InlinedAt = DeclareLoc.getInlinedAt(); + // Because no machine insts can come from debug intrinsics, only the scope + // and inlinedAt is significant. Zero line numbers are used in case this + // DebugLoc leaks into any adjacent instructions. Produce an unknown location + // with the correct scope / inlinedAt fields. + return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); +} + //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index e6b3b5cf159a1..e31e69130d671 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1495,19 +1495,6 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { return false; } -/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted -/// to a dbg.value. Because no machine insts can come from debug intrinsics, -/// only the scope and inlinedAt is significant. Zero line numbers are used in -/// case this DebugLoc leaks into any adjacent instructions. -static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { - // Original dbg.declare must have a location. - const DebugLoc &DeclareLoc = DII->getDebugLoc(); - MDNode *Scope = DeclareLoc.getScope(); - DILocation *InlinedAt = DeclareLoc.getInlinedAt(); - // Produce an unknown location with the correct scope / inlinedAt fields. - return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); -} - /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, @@ -1518,7 +1505,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, auto *DIExpr = DII->getExpression(); Value *DV = SI->getValueOperand(); - DebugLoc NewLoc = getDebugValueLoc(DII, SI); + DebugLoc NewLoc = getDebugValueLoc(DII); if (!valueCoversEntireFragment(DV->getType(), DII)) { // FIXME: If storing to a part of the variable described by the dbg.declare, @@ -1553,7 +1540,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, return; } - DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); + DebugLoc NewLoc = getDebugValueLoc(DII); // We are now tracking the loaded value instead of the address. In the // future if multi-location support is added to the IR, it might be @@ -1587,7 +1574,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, BasicBlock *BB = APN->getParent(); auto InsertionPt = BB->getFirstInsertionPt(); - DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); + DebugLoc NewLoc = getDebugValueLoc(DII); // The block may be a catchswitch block, which does not have a valid // insertion point. @@ -1659,7 +1646,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // pointer to the variable. Insert a *value* intrinsic that describes // the variable by dereferencing the alloca. if (!CI->isLifetimeStartOrEnd()) { - DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr); + DebugLoc NewLoc = getDebugValueLoc(DDI); auto *DerefExpr = DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, From b9b74fc6e98a6e9bd50a1882b839419c9a6577b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 3 Nov 2022 18:27:47 -0700 Subject: [PATCH 421/516] InstCombine: Add baseline tests for fcmp and select on denormal range A future change will try to fold (if input denormals are treated as 0) fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 --- .../InstCombine/fcmp-denormals-are-zero.ll | 383 ++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll diff --git a/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll new file mode 100644 index 0000000000000..385bebcd21741 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll @@ -0,0 +1,383 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s | FileCheck %s + +@var = external global i32, align 4 + +; fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 +; https://alive2.llvm.org/ce/z/fib8cf +define void @denormal_input_preserve_sign_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: [[F32_FABS_FLAGS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[F32]]) +; CHECK-NEXT: [[CMPF32_FLAGS:%.*]] = fcmp olt float [[F32_FABS_FLAGS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32_FLAGS]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + + %f32.fabs.flags = call nsz nnan float @llvm.fabs.f32(float %f32) + %cmpf32.flags = fcmp olt float %f32.fabs.flags, 0x3810000000000000 + store volatile i1 %cmpf32.flags, ptr @var + + ret void +} + +; fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0 +; https://alive2.llvm.org/ce/z/xmqBXx +define void @denormal_input_preserve_sign_fcmp_uge_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_uge_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp uge float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp uge double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp uge half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp uge float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp uge double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp uge half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 +; https://alive2.llvm.org/ce/z/ZucNzF +define void @denormal_input_preserve_sign_fcmp_oge_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_oge_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oge float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oge double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oge half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp oge float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp oge double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp oge half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 +; https://alive2.llvm.org/ce/z/csAhZ2 +define void @denormal_input_preserve_sign_fcmp_ult_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_ult_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ult float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ult double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ult half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp ult float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp ult double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp ult half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt <2 x float> [[F32_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt <2 x double> [[F64_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt <2 x half> [[F16_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp olt <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp olt <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp olt <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp uge <2 x float> [[F32_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp uge <2 x double> [[F64_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp uge <2 x half> [[F16_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp uge <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp uge <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp uge <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oge <2 x float> [[F32_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oge <2 x double> [[F64_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oge <2 x half> [[F16_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp oge <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp oge <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp oge <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ult <2 x float> [[F32_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ult <2 x double> [[F64_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ult <2 x half> [[F16_FABS]], +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp ult <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp ult <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp ult <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +; fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 +; https://alive2.llvm.org/ce/z/mpduXS +define void @denormal_input_positive_zero_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #1 { +; CHECK-LABEL: @denormal_input_positive_zero_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; Should not fold with IEEE inputs. +define void @denormal_input_ieee(float %f32, double %f64, half %f16) #2 { +; CHECK-LABEL: @denormal_input_ieee( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; Only f32 case should fold. +define void @denormal_input_preserve_sign_f32_only(float %f32, double %f64, half %f16) #3 { +; CHECK-LABEL: @denormal_input_preserve_sign_f32_only( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @wrong_fcmp_type_ole(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @wrong_fcmp_type_ole( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ole float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ole double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ole half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp ole float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp ole double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp ole half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @missing_fabs(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @missing_fabs( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32:%.*]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64:%.*]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16:%.*]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %cmpf32 = fcmp olt float %f32, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %cmpf64 = fcmp olt double %f64, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %cmpf16 = fcmp olt half %f16, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +declare float @llvm.fabs.f32(float) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) + +declare half @llvm.fabs.f16(half) +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) + +declare double @llvm.fabs.f64(double) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) + +attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" } +attributes #1 = { "denormal-fp-math"="ieee,positive-zero" } +attributes #2 = { "denormal-fp-math"="ieee,iee" } +attributes #3 = { "denormal-fp-math-f32"="ieee,preserve-sign" } From 0f68ffe1e2b2d4b518aebe14709aff253658cfb2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 3 Nov 2022 18:26:59 -0700 Subject: [PATCH 422/516] InstCombine: Fold compare with smallest normal if input denormals are flushed Try to simplify comparisons with the smallest normalized value. If denormals will be treated as 0, we can simplify by using an equality comparison with 0. fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 The device libraries have a few range checks that look like this for denormal handling paths. --- .../InstCombine/InstCombineCompares.cpp | 42 ++++++++- .../InstCombine/fcmp-denormals-are-zero.ll | 87 +++++++------------ 2 files changed, 69 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 93fc04c6d049f..64ec387b6afa1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6772,10 +6772,48 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, /// Optimize fabs(X) compared with zero. static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { Value *X; - if (!match(I.getOperand(0), m_FAbs(m_Value(X))) || - !match(I.getOperand(1), m_PosZeroFP())) + if (!match(I.getOperand(0), m_FAbs(m_Value(X)))) return nullptr; + const APFloat *C; + if (!match(I.getOperand(1), m_APFloat(C))) + return nullptr; + + if (!C->isPosZero()) { + if (*C != APFloat::getSmallestNormalized(C->getSemantics())) + return nullptr; + + const Function *F = I.getFunction(); + DenormalMode Mode = F->getDenormalMode(C->getSemantics()); + if (Mode.Input == DenormalMode::PreserveSign || + Mode.Input == DenormalMode::PositiveZero) { + + auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) { + Constant *Zero = ConstantFP::getNullValue(X->getType()); + return new FCmpInst(P, X, Zero, "", I); + }; + + switch (I.getPredicate()) { + case FCmpInst::FCMP_OLT: + // fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_OEQ, X); + case FCmpInst::FCMP_UGE: + // fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_UNE, X); + case FCmpInst::FCMP_OGE: + // fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_ONE, X); + case FCmpInst::FCMP_ULT: + // fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_UEQ, X); + default: + break; + } + } + + return nullptr; + } + auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) { I->setPredicate(P); return IC.replaceOperand(*I, 0, X); diff --git a/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll index 385bebcd21741..ef9b6f7ce4396 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll @@ -7,17 +7,13 @@ ; https://alive2.llvm.org/ce/z/fib8cf define void @denormal_input_preserve_sign_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_olt_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq double [[F64:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq half [[F16:%.*]], 0xH0000 ; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 -; CHECK-NEXT: [[F32_FABS_FLAGS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[F32]]) -; CHECK-NEXT: [[CMPF32_FLAGS:%.*]] = fcmp olt float [[F32_FABS_FLAGS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32_FLAGS:%.*]] = fcmp oeq float [[F32]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32_FLAGS]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -44,14 +40,11 @@ define void @denormal_input_preserve_sign_fcmp_olt_smallest_normalized(float %f3 ; https://alive2.llvm.org/ce/z/xmqBXx define void @denormal_input_preserve_sign_fcmp_uge_smallest_normalized(float %f32, double %f64, half %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_uge_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp uge float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp une float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp uge double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp une double [[F64:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp uge half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp une half [[F16:%.*]], 0xH0000 ; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -73,14 +66,11 @@ define void @denormal_input_preserve_sign_fcmp_uge_smallest_normalized(float %f3 ; https://alive2.llvm.org/ce/z/ZucNzF define void @denormal_input_preserve_sign_fcmp_oge_smallest_normalized(float %f32, double %f64, half %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_oge_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oge float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp one float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oge double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp one double [[F64:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oge half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp one half [[F16:%.*]], 0xH0000 ; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -102,14 +92,11 @@ define void @denormal_input_preserve_sign_fcmp_oge_smallest_normalized(float %f3 ; https://alive2.llvm.org/ce/z/csAhZ2 define void @denormal_input_preserve_sign_fcmp_ult_smallest_normalized(float %f32, double %f64, half %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_ult_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ult float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ueq float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ult double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ueq double [[F64:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ult half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ueq half [[F16:%.*]], 0xH0000 ; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -129,14 +116,11 @@ define void @denormal_input_preserve_sign_fcmp_ult_smallest_normalized(float %f3 define void @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt <2 x float> [[F32_FABS]], +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq <2 x float> [[F32:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt <2 x double> [[F64_FABS]], +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq <2 x double> [[F64:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt <2 x half> [[F16_FABS]], +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq <2 x half> [[F16:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -156,14 +140,11 @@ define void @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized(<2 define void @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp uge <2 x float> [[F32_FABS]], +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp une <2 x float> [[F32:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp uge <2 x double> [[F64_FABS]], +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp une <2 x double> [[F64:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp uge <2 x half> [[F16_FABS]], +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp une <2 x half> [[F16:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -183,14 +164,11 @@ define void @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized(<2 define void @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oge <2 x float> [[F32_FABS]], +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp one <2 x float> [[F32:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oge <2 x double> [[F64_FABS]], +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp one <2 x double> [[F64:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oge <2 x half> [[F16_FABS]], +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp one <2 x half> [[F16:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -210,14 +188,11 @@ define void @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized(<2 define void @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { ; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ult <2 x float> [[F32_FABS]], +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ueq <2 x float> [[F32:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ult <2 x double> [[F64_FABS]], +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ueq <2 x double> [[F64:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ult <2 x half> [[F16_FABS]], +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ueq <2 x half> [[F16:%.*]], zeroinitializer ; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -239,14 +214,11 @@ define void @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized(<2 ; https://alive2.llvm.org/ce/z/mpduXS define void @denormal_input_positive_zero_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #1 { ; CHECK-LABEL: @denormal_input_positive_zero_fcmp_olt_smallest_normalized( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 -; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) -; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq double [[F64:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 -; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) -; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq half [[F16:%.*]], 0xH0000 ; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 ; CHECK-NEXT: ret void ; @@ -295,8 +267,7 @@ define void @denormal_input_ieee(float %f32, double %f64, half %f16) #2 { ; Only f32 case should fold. define void @denormal_input_preserve_sign_f32_only(float %f32, double %f64, half %f16) #3 { ; CHECK-LABEL: @denormal_input_preserve_sign_f32_only( -; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) -; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 ; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 ; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) ; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 From 5c0cb75787b9a8a7fd488fb05b8350dc798aee9a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Nov 2022 15:17:23 +0000 Subject: [PATCH 423/516] [X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswell/IceLake There can be a difference for MOVDDUPrr but not the load folded broadcast that is purely on Port23 Fixes an old TODO (inherited from SkylakeServer which was fixed at c7662dc3e52801ec824d8473278fb976107d3e57) Confirmed on Agner + uops.info --- llvm/lib/Target/X86/X86SchedHaswell.td | 8 +------- llvm/lib/Target/X86/X86SchedIceLake.td | 11 +++-------- llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s | 2 +- llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s | 2 +- .../tools/llvm-mca/X86/IceLakeServer/resources-avx1.s | 2 +- .../tools/llvm-mca/X86/IceLakeServer/resources-sse3.s | 2 +- 6 files changed, 8 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 44fc1acf6b742..bd4cbe2469693 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>; def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm", "(V?)MOVSLDUPrm", + "(V?)MOVDDUPrm", "VPBROADCAST(D|Q)rm")>; def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> { @@ -881,13 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128, def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m", "VPBROADCAST(D|Q)Yrm")>; -def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>; - def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { let Latency = 1; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 43fb6eeacc256..331fafa6d2fe3 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1066,13 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> { } def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71? - def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1174,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm, VPBROADCASTQrm, VMOVSHDUPrm, VMOVSLDUPrm, + VMOVDDUPrm, MOVSHDUPrm, - MOVSLDUPrm)>; + MOVSLDUPrm, + MOVDDUPrm)>; def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> { let Latency = 6; diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index ec5c773330c86..ea7d251ffccef 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 1 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s index 6832defc50e59..7085718405a44 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s index fa0720f4cef57..383ddac8d16d0 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s index e09b9e0f757b3..4d1942450ec63 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2 From cf69895ab31b40fe0d1275a29c4a29283ce327ae Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 2 Nov 2022 14:58:34 +0000 Subject: [PATCH 424/516] [AArch64][SVE2] Add the SVE2.1 BF16 instructions This patch adds the new FEAT_B16B16 feature as well as the assembly/disassembly for all of the B16B16 instructions: bfadd: BFloat16 floating-point add vectors bfsub: BFloat16 floating-point subtract vectors bfmul: BFloat16 floating-point multiply vectors bfclamp: BFloat16 floating-point clamp to minimum/maximum number bfmax: BFloat16 floating-point maximum bfmaxnm: BFloat16 floating-point maximum number bfmin: BFloat16 floating-point minimum bfminnm: BFloat16 floating-point minimum number bfmla: BFloat16 floating-point fused multiply-add vectors bfmls: BFloat16 floating-point fused multiply-subtract vectors The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D137321 --- .../llvm/Support/AArch64TargetParser.def | 1 + .../llvm/Support/AArch64TargetParser.h | 1 + llvm/lib/Target/AArch64/AArch64.td | 7 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 32 +++++- .../Target/AArch64/AArch64SchedNeoverseN2.td | 3 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/lib/Target/AArch64/SVEInstrFormats.td | 46 +++++--- .../MC/AArch64/SVE2p1/bfadd-diagnostics.s | 36 +++++++ llvm/test/MC/AArch64/SVE2p1/bfadd.s | 76 +++++++++++++ .../MC/AArch64/SVE2p1/bfclamp-diagnostics.s | 14 +++ llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 46 ++++++++ .../MC/AArch64/SVE2p1/bfmax-diagnostics.s | 27 +++++ llvm/test/MC/AArch64/SVE2p1/bfmax.s | 53 +++++++++ .../MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s | 27 +++++ llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 54 ++++++++++ .../MC/AArch64/SVE2p1/bfmin-diagnostics.s | 27 +++++ llvm/test/MC/AArch64/SVE2p1/bfmin.s | 54 ++++++++++ .../MC/AArch64/SVE2p1/bfminnm-diagnostics.s | 27 +++++ llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 54 ++++++++++ .../MC/AArch64/SVE2p1/bfmla-diagnostics.s | 41 +++++++ llvm/test/MC/AArch64/SVE2p1/bfmla.s | 87 +++++++++++++++ .../MC/AArch64/SVE2p1/bfmls-diagnostics.s | 41 +++++++ llvm/test/MC/AArch64/SVE2p1/bfmls.s | 87 +++++++++++++++ .../MC/AArch64/SVE2p1/bfmul-diagnostics.s | 36 +++++++ llvm/test/MC/AArch64/SVE2p1/bfmul.s | 101 ++++++++++++++++++ .../MC/AArch64/SVE2p1/bfsub-diagnostics.s | 36 +++++++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 76 +++++++++++++ llvm/unittests/Support/TargetParserTest.cpp | 5 +- 29 files changed, 1080 insertions(+), 18 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfadd.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfclamp.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmax.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmin.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfminnm.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmla.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmls.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfmul.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/bfsub.s diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index d1366f81cb5b3..e1792a57e35e0 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -129,6 +129,7 @@ AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1") +AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16") AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index e648ca8aceae0..24ffb9195454a 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -75,6 +75,7 @@ enum ArchExtKind : uint64_t { AEK_SME2 = 1ULL << 43, // FEAT_SME2 AEK_SVE2p1 = 1ULL << 44, // FEAT_SVE2p1 AEK_SME2p1 = 1ULL << 45, // FEAT_SME2p1 + AEK_B16B16 = 1ULL << 46 // FEAT_B16B16 }; enum class ArchKind { diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 981e213186bf8..f28f460ea4e72 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -165,6 +165,9 @@ def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "tru def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true", "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>; +def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true", + "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>; + def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -643,7 +646,7 @@ class AArch64Unsupported { list F; } def SVEUnsupported : AArch64Unsupported { let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, - HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME]; + HasSVE2BitPerm, HasSVEorSME, HasSVE2p1, HasSVE2orSME, HasSVE2p1_or_HasSME2p1]; } def PAUnsupported : AArch64Unsupported { @@ -651,7 +654,7 @@ def PAUnsupported : AArch64Unsupported { } def SMEUnsupported : AArch64Unsupported { - let F = [HasSME, HasSMEF64F64, HasSMEI16I64, HasSME2, HasSVE2p1_or_HasSME2]; + let F = [HasSME, HasSMEF64F64, HasSMEI16I64, HasSME2, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1]; } include "AArch64SchedA53.td" diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0439de8c1c190..b3db70967f14e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -138,6 +138,8 @@ def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; +def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, + AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; def HasSME : Predicate<"Subtarget->hasSME()">, AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1549295a72bc4..2a60c32edc9d7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -689,8 +689,8 @@ let Predicates = [HasSVE] in { } // End HasSVE let Predicates = [HasSVEorSME] in { - defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; - defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane>; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane>; defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; @@ -3696,3 +3696,31 @@ defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; } // End HasSVE2p1_or_HasSME2 + +//===----------------------------------------------------------------------===// +// SVE2.1 non-widening BFloat16 to BFloat16 instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasSVE2p1_or_HasSME2p1, HasB16B16] in { +def BFADD_ZZZ : sve_fp_3op_u_zd<0b00, 0b000, "bfadd", ZPR16>; +def BFSUB_ZZZ : sve_fp_3op_u_zd<0b00, 0b001, "bfsub", ZPR16>; +def BFMUL_ZZZ : sve_fp_3op_u_zd<0b00, 0b010, "bfmul", ZPR16>; + +def BFMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b00, "bfmla", ZPR16>; +def BFMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b01, "bfmls", ZPR16>; + +def BFADD_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0000, "bfadd", ZPR16>; +def BFSUB_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0001, "bfsub", ZPR16>; +def BFMUL_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0010, "bfmul", ZPR16>; +def BFMAXNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0100, "bfmaxnm", ZPR16>; +def BFMINNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0101, "bfminnm", ZPR16>; +def BFMAX_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0110, "bfmax", ZPR16>; +def BFMIN_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0111, "bfmin", ZPR16>; + +defm BFMLA_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmla", 0b10>; +defm BFMLS_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmls", 0b11>; + +defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul">; + +def BFCLAMP_ZZZ : sve2p1_fclamp<"bfclamp", 0b00, ZPR16>; +} // End HasSVE2p1_or_HasSME2p1, HasB16B16 diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index c472de8df2f8d..21a0e927d7567 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -18,7 +18,8 @@ def NeoverseN2Model : SchedMachineModel { let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. let CompleteModel = 1; - list UnsupportedFeatures = SMEUnsupported.F; + list UnsupportedFeatures = !listconcat(SMEUnsupported.F, + [HasSVE2p1]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index e4b2c09ec8d3a..cc6bd90b69568 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3476,6 +3476,7 @@ static const struct Extension { {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, {"sve2p1", {AArch64::FeatureSVE2p1}}, + {"b16b16", {AArch64::FeatureB16B16}}, {"ls64", {AArch64::FeatureLS64}}, {"xs", {AArch64::FeatureXS}}, {"pauth", {AArch64::FeaturePAuth}}, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 01ef367fef752..f91e5243fca4c 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2105,19 +2105,18 @@ class sve_fp_3op_p_zds_a sz, bits<2> opc, string asm, ZPRRegOp zprty> let Constraints = "$Zda = $_Zda"; let ElementSize = zprty.ElementSize; + let DestructiveInstType = DestructiveTernaryCommWithRev; } multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, SDPatternOperator op, string revname, bit isReverseInstr=0> { - let DestructiveInstType = DestructiveTernaryCommWithRev in { def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>, SVEPseudo2Instr, SVEInstr2Rev; def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>, SVEPseudo2Instr, SVEInstr2Rev; def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; - } def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; @@ -2173,7 +2172,7 @@ multiclass sve_fp_3op_p_zds_zx { // SVE Floating Point Multiply-Add - Indexed Group //===----------------------------------------------------------------------===// -class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, +class sve_fp_fma_by_indexed_elem sz, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand itype> : I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop), @@ -2183,8 +2182,8 @@ class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, let Inst{31-24} = 0b01100100; let Inst{23-22} = sz; let Inst{21} = 0b1; - let Inst{15-11} = 0; - let Inst{10} = opc; + let Inst{15-12} = 0b0000; + let Inst{11-10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -2193,7 +2192,18 @@ class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, let ElementSize = ElementSizeNone; } -multiclass sve_fp_fma_by_indexed_elem opc> { + def NAME : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, + VectorIndexH32b> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } +} + +multiclass sve_fp_fma_by_indexed_elem opc, string asm, SDPatternOperator op> { def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; @@ -2228,8 +2238,8 @@ multiclass sve_fp_fma_by_indexed_elem sz, string asm, ZPRRegOp zprty, - ZPRRegOp zprty2, Operand itype> +class sve_fp_fmul_by_indexed_elem sz, bit o2, string asm, ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> : I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop), asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zd; @@ -2237,26 +2247,38 @@ class sve_fp_fmul_by_indexed_elem sz, string asm, ZPRRegOp zprty, let Inst{31-24} = 0b01100100; let Inst{23-22} = sz; let Inst{21} = 0b1; - let Inst{15-10} = 0b001000; + let Inst{15-12} = 0b0010; + let Inst{11} = o2; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } +multiclass sve2p1_fp_bfmul_by_indexed_elem { + def NAME : sve_fp_fmul_by_indexed_elem<{0, ?}, 0b1, asm, ZPR16, ZPR3b16, VectorIndexH32b> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } +} + multiclass sve_fp_fmul_by_indexed_elem { - def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, 0b0, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> { + def _S : sve_fp_fmul_by_indexed_elem<0b10, 0b0, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> { + def _D : sve_fp_fmul_by_indexed_elem<0b11, 0b0, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; diff --git a/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s new file mode 100644 index 0000000000000..1ead9d28277aa --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfadd z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfadd z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfadd z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfadd z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfadd z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfadd z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfadd z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfadd z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfadd z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfadd z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfadd.s b/llvm/test/MC/AArch64/SVE2p1/bfadd.s new file mode 100644 index 0000000000000..1021df12fc050 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfadd.s @@ -0,0 +1,76 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23.h, p3/m, z31.h +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +movprfx z23, z31 +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +bfadd z0.h, p0/m, z0.h, z0.h // 01100101-00000000-10000000-00000000 +// CHECK-INST: bfadd z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008000 + +bfadd z21.h, p5/m, z21.h, z10.h // 01100101-00000000-10010101-01010101 +// CHECK-INST: bfadd z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65009555 + +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +bfadd z31.h, p7/m, z31.h, z31.h // 01100101-00000000-10011111-11111111 +// CHECK-INST: bfadd z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65009fff + +bfadd z0.h, z0.h, z0.h // 01100101-00000000-00000000-00000000 +// CHECK-INST: bfadd z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x00,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000000 + +bfadd z21.h, z10.h, z21.h // 01100101-00010101-00000001-01010101 +// CHECK-INST: bfadd z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x01,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150155 + +bfadd z23.h, z13.h, z8.h // 01100101-00001000-00000001-10110111 +// CHECK-INST: bfadd z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x01,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650801b7 + +bfadd z31.h, z31.h, z31.h // 01100101-00011111-00000011-11111111 +// CHECK-INST: bfadd z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x03,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f03ff diff --git a/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s new file mode 100644 index 0000000000000..b18108fcdf08e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfclamp z23.h, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfclamp z23.h, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfclamp z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfclamp z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfclamp.s b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s new file mode 100644 index 0000000000000..d7b85edb1730e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfclamp z23.h, z13.h, z8.h // 01100100-00101000-00100101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfclamp z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x25,0x28,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 642825b7 + +bfclamp z0.h, z0.h, z0.h // 01100100-00100000-00100100-00000000 +// CHECK-INST: bfclamp z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x24,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64202400 + +bfclamp z21.h, z10.h, z21.h // 01100100-00110101-00100101-01010101 +// CHECK-INST: bfclamp z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x25,0x35,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64352555 + +bfclamp z23.h, z13.h, z8.h // 01100100-00101000-00100101-10110111 +// CHECK-INST: bfclamp z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x25,0x28,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 642825b7 + +bfclamp z31.h, z31.h, z31.h // 01100100-00111111-00100111-11111111 +// CHECK-INST: bfclamp z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x27,0x3f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 643f27ff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s new file mode 100644 index 0000000000000..f7e30713e7d52 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmax z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmax z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmax z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmax z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmax z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmax z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmax z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmax z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmax.s b/llvm/test/MC/AArch64/SVE2p1/bfmax.s new file mode 100644 index 0000000000000..cd67abc498f3b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmax.s @@ -0,0 +1,53 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +movprfx z23, z31 +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +bfmax z0.h, p0/m, z0.h, z0.h // 01100101-00000110-10000000-00000000 +// CHECK-INST: bfmax z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068000 + +bfmax z21.h, p5/m, z21.h, z10.h // 01100101-00000110-10010101-01010101 +// CHECK-INST: bfmax z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65069555 + +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +bfmax z31.h, p7/m, z31.h, z31.h // 01100101-00000110-10011111-11111111 +// CHECK-INST: bfmax z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65069fff diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s new file mode 100644 index 0000000000000..220b66b435ed4 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmaxnm z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmaxnm z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmaxnm z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmaxnm z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmaxnm z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmaxnm z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmaxnm z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmaxnm z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s new file mode 100644 index 0000000000000..83669ebc42b1f --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +movprfx z23, z31 +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +bfmaxnm z0.h, p0/m, z0.h, z0.h // 01100101-00000100-10000000-00000000 +// CHECK-INST: bfmaxnm z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048000 + +bfmaxnm z21.h, p5/m, z21.h, z10.h // 01100101-00000100-10010101-01010101 +// CHECK-INST: bfmaxnm z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65049555 + +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +bfmaxnm z31.h, p7/m, z31.h, z31.h // 01100101-00000100-10011111-11111111 +// CHECK-INST: bfmaxnm z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65049fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s new file mode 100644 index 0000000000000..a7f8be225fac9 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmin z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmin z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmin z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmin z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmin z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmin z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmin z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmin z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmin.s b/llvm/test/MC/AArch64/SVE2p1/bfmin.s new file mode 100644 index 0000000000000..1bb3a0e6f1f26 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmin.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +movprfx z23, z31 +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +bfmin z0.h, p0/m, z0.h, z0.h // 01100101-00000111-10000000-00000000 +// CHECK-INST: bfmin z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078000 + +bfmin z21.h, p5/m, z21.h, z10.h // 01100101-00000111-10010101-01010101 +// CHECK-INST: bfmin z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65079555 + +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +bfmin z31.h, p7/m, z31.h, z31.h // 01100101-00000111-10011111-11111111 +// CHECK-INST: bfmin z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65079fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s new file mode 100644 index 0000000000000..68c4211afa627 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfminnm z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfminnm z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfminnm z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfminnm z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfminnm z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfminnm z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfminnm z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfminnm z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfminnm.s b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s new file mode 100644 index 0000000000000..9f444c7ac26ae --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +movprfx z23, z31 +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +bfminnm z0.h, p0/m, z0.h, z0.h // 01100101-00000101-10000000-00000000 +// CHECK-INST: bfminnm z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058000 + +bfminnm z21.h, p5/m, z21.h, z10.h // 01100101-00000101-10010101-01010101 +// CHECK-INST: bfminnm z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65059555 + +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +bfminnm z31.h, p7/m, z31.h, z31.h // 01100101-00000101-10011111-11111111 +// CHECK-INST: bfminnm z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65059fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s new file mode 100644 index 0000000000000..035f2898e2b92 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s @@ -0,0 +1,41 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +bfmla z0.h, z0.h, z0.h[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmla z0.h, z0.h, z0.h[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z0.h, z0.h, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmla z0.h, z0.h, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z0.h, z0.h, z8.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfmla z0.h, z0.h, z8.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmla z0.h, z0.s, z0.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmla z0.h, z0.s, z0.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmla z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmla z23.h, z12.h, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfmla z23.h, z12.h, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmla.s b/llvm/test/MC/AArch64/SVE2p1/bfmla.s new file mode 100644 index 0000000000000..ff257830a13da --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmla.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfmla z23.h, z13.h, z0.h[5] // 01100100-01101000-00001001-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmla z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x09,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646809b7 + +bfmla z0.h, z0.h, z0.h[0] // 01100100-00100000-00001000-00000000 +// CHECK-INST: bfmla z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x08,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64200800 + +bfmla z21.h, z10.h, z5.h[6] // 01100100-01110101-00001001-01010101 +// CHECK-INST: bfmla z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x09,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64750955 + +bfmla z23.h, z13.h, z0.h[5] // 01100100-01101000-00001001-10110111 +// CHECK-INST: bfmla z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x09,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646809b7 + +bfmla z31.h, z31.h, z7.h[7] // 01100100-01111111-00001011-11111111 +// CHECK-INST: bfmla z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x0b,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f0bff + + +movprfx z23.h, p3/m, z31.h +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +movprfx z23, z31 +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +bfmla z0.h, p0/m, z0.h, z0.h // 01100101-00100000-00000000-00000000 +// CHECK-INST: bfmla z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x00,0x20,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65200000 + +bfmla z21.h, p5/m, z10.h, z21.h // 01100101-00110101-00010101-01010101 +// CHECK-INST: bfmla z21.h, p5/m, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x15,0x35,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65351555 + +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +bfmla z31.h, p7/m, z31.h, z31.h // 01100101-00111111-00011111-11111111 +// CHECK-INST: bfmla z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x1f,0x3f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 653f1fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s new file mode 100644 index 0000000000000..cbc7efe9df7aa --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s @@ -0,0 +1,41 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +bfmls z0.h, z0.h, z0.h[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmls z0.h, z0.h, z0.h[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z0.h, z0.h, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmls z0.h, z0.h, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z0.h, z0.h, z8.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfmls z0.h, z0.h, z8.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmls z0.h, z0.s, z0.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmls z0.h, z0.s, z0.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmls z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmls z23.h, z12.h, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfmls z23.h, z12.h, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmls.s b/llvm/test/MC/AArch64/SVE2p1/bfmls.s new file mode 100644 index 0000000000000..c153b56b9586b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmls.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfmls z23.h, z13.h, z0.h[5] // 01100100-01101000-00001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmls z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64680db7 + +bfmls z0.h, z0.h, z0.h[0] // 01100100-00100000-00001100-00000000 +// CHECK-INST: bfmls z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x0c,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64200c00 + +bfmls z21.h, z10.h, z5.h[6] // 01100100-01110101-00001101-01010101 +// CHECK-INST: bfmls z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x0d,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64750d55 + +bfmls z23.h, z13.h, z0.h[5] // 01100100-01101000-00001101-10110111 +// CHECK-INST: bfmls z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64680db7 + +bfmls z31.h, z31.h, z7.h[7] // 01100100-01111111-00001111-11111111 +// CHECK-INST: bfmls z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x0f,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f0fff + + +movprfx z23.h, p3/m, z31.h +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +movprfx z23, z31 +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +bfmls z0.h, p0/m, z0.h, z0.h // 01100101-00100000-00100000-00000000 +// CHECK-INST: bfmls z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x20,0x20,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65202000 + +bfmls z21.h, p5/m, z10.h, z21.h // 01100101-00110101-00110101-01010101 +// CHECK-INST: bfmls z21.h, p5/m, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x35,0x35,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65353555 + +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +bfmls z31.h, p7/m, z31.h, z31.h // 01100101-00111111-00111111-11111111 +// CHECK-INST: bfmls z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x3f,0x3f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 653f3fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s new file mode 100644 index 0000000000000..51adae0689603 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmul z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmul z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmul z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmul z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmul z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmul z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmul z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmul z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmul z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfmul z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmul.s b/llvm/test/MC/AArch64/SVE2p1/bfmul.s new file mode 100644 index 0000000000000..e0b93bcbb1035 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmul.s @@ -0,0 +1,101 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +bfmul z0.h, z0.h, z0.h[0] // 01100100-00100000-00101000-00000000 +// CHECK-INST: bfmul z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x28,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64202800 + +bfmul z21.h, z10.h, z5.h[6] // 01100100-01110101-00101001-01010101 +// CHECK-INST: bfmul z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x29,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64752955 + +bfmul z23.h, z13.h, z0.h[5] // 01100100-01101000-00101001-10110111 +// CHECK-INST: bfmul z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x29,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646829b7 + +bfmul z31.h, z31.h, z7.h[7] // 01100100-01111111-00101011-11111111 +// CHECK-INST: bfmul z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x2b,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f2bff + +movprfx z23.h, p3/m, z31.h +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +movprfx z23, z31 +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +bfmul z0.h, p0/m, z0.h, z0.h // 01100101-00000010-10000000-00000000 +// CHECK-INST: bfmul z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028000 + +bfmul z21.h, p5/m, z21.h, z10.h // 01100101-00000010-10010101-01010101 +// CHECK-INST: bfmul z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65029555 + +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +bfmul z31.h, p7/m, z31.h, z31.h // 01100101-00000010-10011111-11111111 +// CHECK-INST: bfmul z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65029fff + +bfmul z0.h, z0.h, z0.h // 01100101-00000000-00001000-00000000 +// CHECK-INST: bfmul z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x08,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000800 + +bfmul z21.h, z10.h, z21.h // 01100101-00010101-00001001-01010101 +// CHECK-INST: bfmul z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x09,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150955 + +bfmul z23.h, z13.h, z8.h // 01100101-00001000-00001001-10110111 +// CHECK-INST: bfmul z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x09,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650809b7 + +bfmul z31.h, z31.h, z31.h // 01100101-00011111-00001011-11111111 +// CHECK-INST: bfmul z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x0b,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f0bff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s new file mode 100644 index 0000000000000..86cb32075f501 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfsub z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfsub z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfsub z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfsub z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfsub z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfsub z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfsub z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfsub z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfsub z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfsub z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfsub.s b/llvm/test/MC/AArch64/SVE2p1/bfsub.s new file mode 100644 index 0000000000000..42cb6772c3a51 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfsub.s @@ -0,0 +1,76 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23.h, p3/m, z31.h +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +movprfx z23, z31 +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +bfsub z0.h, p0/m, z0.h, z0.h // 01100101-00000001-10000000-00000000 +// CHECK-INST: bfsub z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018000 + +bfsub z21.h, p5/m, z21.h, z10.h // 01100101-00000001-10010101-01010101 +// CHECK-INST: bfsub z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65019555 + +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +bfsub z31.h, p7/m, z31.h, z31.h // 01100101-00000001-10011111-11111111 +// CHECK-INST: bfsub z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65019fff + +bfsub z0.h, z0.h, z0.h // 01100101-00000000-00000100-00000000 +// CHECK-INST: bfsub z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x04,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000400 + +bfsub z21.h, z10.h, z21.h // 01100101-00010101-00000101-01010101 +// CHECK-INST: bfsub z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x05,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150555 + +bfsub z23.h, z13.h, z8.h // 01100101-00001000-00000101-10110111 +// CHECK-INST: bfsub z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x05,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650805b7 + +bfsub z31.h, z31.h, z31.h // 01100101-00011111-00000111-11111111 +// CHECK-INST: bfsub z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x07,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f07ff diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 904a7316c5046..dd4e5fcd0fa22 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1519,7 +1519,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_BRBE, AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, AArch64::AEK_SME, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64, AArch64::AEK_SME2, AArch64::AEK_HBC, AArch64::AEK_MOPS, - AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, AArch64::AEK_SME2p1}; + AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, AArch64::AEK_SME2p1, + AArch64::AEK_B16B16}; std::vector Features; @@ -1559,6 +1560,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1")); + EXPECT_TRUE(llvm::is_contained(Features, "+b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+rcpc")); EXPECT_TRUE(llvm::is_contained(Features, "+rand")); EXPECT_TRUE(llvm::is_contained(Features, "+mte")); @@ -1642,6 +1644,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"}, {"sve2-sha3", "nosve2-sha3", "+sve2-sha3", "-sve2-sha3"}, {"sve2p1", "nosve2p1", "+sve2p1", "-sve2p1"}, + {"b16b16", "nob16b16", "+b16b16", "-b16b16"}, {"sve2-bitperm", "nosve2-bitperm", "+sve2-bitperm", "-sve2-bitperm"}, {"dotprod", "nodotprod", "+dotprod", "-dotprod"}, {"rcpc", "norcpc", "+rcpc", "-rcpc"}, From 9b800bf79d9d2fa18ed5be891346155238015515 Mon Sep 17 00:00:00 2001 From: bixia1 Date: Sun, 6 Nov 2022 18:46:49 -0800 Subject: [PATCH 425/516] [mlir][sparse] Improve the non-stable sort implementation. Replace the quick sort partition method with one that is more similar to the method used by C++ std quick sort. This improves the runtime for sorting sk_2005.mtx by more than 10x. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137290 --- .../Transforms/SparseBufferRewriting.cpp | 340 ++++++++++++------ .../SparseTensor/buffer_rewriting.mlir | 63 ++-- .../SparseTensor/CPU/sparse_rewrite_sort.mlir | 2 +- 3 files changed, 267 insertions(+), 138 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp index 929d4a4ddf1f3..0af92a656d848 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp @@ -33,8 +33,8 @@ static constexpr uint64_t loIdx = 0; static constexpr uint64_t hiIdx = 1; static constexpr uint64_t xStartIdx = 2; -static constexpr const char kMaySwapFuncNamePrefix[] = "_sparse_may_swap_"; static constexpr const char kLessThanFuncNamePrefix[] = "_sparse_less_than_"; +static constexpr const char kCompareEqFuncNamePrefix[] = "_sparse_compare_eq_"; static constexpr const char kPartitionFuncNamePrefix[] = "_sparse_partition_"; static constexpr const char kBinarySearchFuncNamePrefix[] = "_sparse_binary_search_"; @@ -90,11 +90,10 @@ getMangledSortHelperFunc(OpBuilder &builder, func::FuncOp insertPoint, return result; } -/// Creates a function for swapping the values in index i and j for all the +/// Creates a code block for swapping the values in index i and j for all the /// buffers. // -// The generate IR corresponds to this C like algorithm: -// if (i != j) { +// The generated IR corresponds to this C like algorithm: // swap(x0[i], x0[j]); // swap(x1[i], x1[j]); // ... @@ -102,36 +101,90 @@ getMangledSortHelperFunc(OpBuilder &builder, func::FuncOp insertPoint, // swap(y0[i], y0[j]); // ... // swap(yn[i], yn[j]); -// } -static void createMaySwapFunc(OpBuilder &builder, ModuleOp unused, - func::FuncOp func, size_t dim) { +static void createSwap(OpBuilder &builder, Location loc, ValueRange args) { + Value i = args[0]; + Value j = args[1]; + for (auto arg : args.drop_front(xStartIdx)) { + Value vi = builder.create(loc, arg, i); + Value vj = builder.create(loc, arg, j); + builder.create(loc, vj, arg, i); + builder.create(loc, vi, arg, j); + } +} + +/// Creates a function to compare all the (xs[i], xs[j]) pairs. The method to +/// compare each pair is create via `compareBuilder`. +static void createCompareFuncImplementation( + OpBuilder &builder, ModuleOp unused, func::FuncOp func, size_t dim, + function_ref + compareBuilder) { OpBuilder::InsertionGuard insertionGuard(builder); Block *entryBlock = func.addEntryBlock(); builder.setInsertionPointToStart(entryBlock); - Location loc = func.getLoc(); ValueRange args = entryBlock->getArguments(); - Value i = args[0]; - Value j = args[1]; + + scf::IfOp topIfOp; + for (const auto &item : llvm::enumerate(args.slice(xStartIdx, dim))) { + scf::IfOp ifOp = compareBuilder(builder, loc, args[0], args[1], + item.value(), (item.index() == dim - 1)); + if (item.index() == 0) { + topIfOp = ifOp; + } else { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointAfter(ifOp); + builder.create(loc, ifOp.getResult(0)); + } + } + + builder.setInsertionPointAfter(topIfOp); + builder.create(loc, topIfOp.getResult(0)); +} + +/// Generates an if-statement to compare whether x[i] is equal to x[j]. +static scf::IfOp createEqCompare(OpBuilder &builder, Location loc, Value i, + Value j, Value x, bool isLastDim) { + Value f = constantI1(builder, loc, false); + Value t = constantI1(builder, loc, true); + Value vi = builder.create(loc, x, i); + Value vj = builder.create(loc, x, j); + Value cond = - builder.create(loc, arith::CmpIPredicate::ne, i, j); - scf::IfOp ifOp = builder.create(loc, cond, /*else=*/false); + builder.create(loc, arith::CmpIPredicate::eq, vi, vj); + scf::IfOp ifOp = + builder.create(loc, f.getType(), cond, /*else=*/true); - // If i!=j swap values in the buffers. + // x[1] != x[j]: + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, f); + + // x[i] == x[j]: builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - for (auto arg : args.drop_front(xStartIdx)) { - Value vi = builder.create(loc, arg, i); - Value vj = builder.create(loc, arg, j); - builder.create(loc, vj, arg, i); - builder.create(loc, vi, arg, j); + if (isLastDim == 1) { + // Finish checking all dimensions. + builder.create(loc, t); } - builder.setInsertionPointAfter(ifOp); - builder.create(loc); + return ifOp; +} + +/// Creates a function to compare whether xs[i] is equal to xs[j]. +// +// The generate IR corresponds to this C like algorithm: +// if (x0[i] != x0[j]) +// return false; +// else +// if (x1[i] != x1[j]) +// return false; +// else if (x2[2] != x2[j])) +// and so on ... +static void createEqCompareFunc(OpBuilder &builder, ModuleOp unused, + func::FuncOp func, size_t dim) { + createCompareFuncImplementation(builder, unused, func, dim, createEqCompare); } -/// Generates an if-statement to compare x[i] and x[j]. +/// Generates an if-statement to compare whether x[i] is less than x[j]. static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, Value i, Value j, Value x, bool isLastDim) { @@ -172,8 +225,7 @@ static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, return ifOp; } -/// Creates a function to compare the xs values in index i and j for all the -/// dimensions. The function returns true iff xs[i] < xs[j]. +/// Creates a function to compare whether xs[i] is less than xs[j]. // // The generate IR corresponds to this C like algorithm: // if (x0[i] < x0[j]) @@ -187,29 +239,8 @@ static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, // and so on ... static void createLessThanFunc(OpBuilder &builder, ModuleOp unused, func::FuncOp func, size_t dim) { - OpBuilder::InsertionGuard insertionGuard(builder); - - Block *entryBlock = func.addEntryBlock(); - builder.setInsertionPointToStart(entryBlock); - Location loc = func.getLoc(); - ValueRange args = entryBlock->getArguments(); - - scf::IfOp topIfOp; - for (const auto &item : llvm::enumerate(args.slice(xStartIdx, dim))) { - scf::IfOp ifOp = - createLessThanCompare(builder, loc, args[0], args[1], item.value(), - (item.index() == dim - 1)); - if (item.index() == 0) { - topIfOp = ifOp; - } else { - OpBuilder::InsertionGuard insertionGuard(builder); - builder.setInsertionPointAfter(ifOp); - builder.create(loc, ifOp.getResult(0)); - } - } - - builder.setInsertionPointAfter(topIfOp); - builder.create(loc, topIfOp.getResult(0)); + createCompareFuncImplementation(builder, unused, func, dim, + createLessThanCompare); } /// Creates a function to use a binary search to find the insertion point for @@ -285,23 +316,94 @@ static void createBinarySearchFunc(OpBuilder &builder, ModuleOp module, builder.create(loc, whileOp.getResult(0)); } +/// Creates code to advance i in a loop based on xs[p] as follows: +/// while (xs[i] < xs[p]) i += step (step > 0) +/// or +/// while (xs[i] > xs[p]) i += step (step < 0) +/// The routine returns i as well as a boolean value to indicate whether +/// xs[i] == xs[p]. +static std::pair +createScanLoop(OpBuilder &builder, ModuleOp module, func::FuncOp func, + ValueRange xs, Value i, Value p, size_t dim, int step) { + Location loc = func.getLoc(); + scf::WhileOp whileOp = + builder.create(loc, TypeRange{i.getType()}, ValueRange{i}); + + Block *before = + builder.createBlock(&whileOp.getBefore(), {}, {i.getType()}, {loc}); + builder.setInsertionPointToEnd(before); + SmallVector compareOperands; + if (step > 0) { + compareOperands.push_back(before->getArgument(0)); + compareOperands.push_back(p); + } else { + assert(step < 0); + compareOperands.push_back(p); + compareOperands.push_back(before->getArgument(0)); + } + compareOperands.append(xs.begin(), xs.end()); + MLIRContext *context = module.getContext(); + Type i1Type = IntegerType::get(context, 1, IntegerType::Signless); + FlatSymbolRefAttr lessThanFunc = + getMangledSortHelperFunc(builder, func, {i1Type}, kLessThanFuncNamePrefix, + dim, compareOperands, createLessThanFunc); + Value cond = builder + .create(loc, lessThanFunc, TypeRange{i1Type}, + compareOperands) + .getResult(0); + builder.create(loc, cond, before->getArguments()); + + Block *after = + builder.createBlock(&whileOp.getAfter(), {}, {i.getType()}, {loc}); + builder.setInsertionPointToEnd(after); + Value cs = constantIndex(builder, loc, step); + i = builder.create(loc, after->getArgument(0), cs); + builder.create(loc, ValueRange{i}); + i = whileOp.getResult(0); + + builder.setInsertionPointAfter(whileOp); + compareOperands[0] = i; + compareOperands[1] = p; + FlatSymbolRefAttr compareEqFunc = getMangledSortHelperFunc( + builder, func, {i1Type}, kCompareEqFuncNamePrefix, dim, compareOperands, + createEqCompareFunc); + Value compareEq = + builder + .create(loc, compareEqFunc, TypeRange{i1Type}, + compareOperands) + .getResult(0); + + return std::make_pair(whileOp.getResult(0), compareEq); +} + /// Creates a function to perform quick sort partition on the values in the /// range of index [lo, hi), assuming lo < hi. // // The generated IR corresponds to this C like algorithm: -// int partition(lo, hi, data) { -// pivot = data[hi - 1]; -// i = (lo – 1) // RHS of the pivot found so far. -// for (j = lo; j < hi - 1; j++){ -// if (data[j] < pivot){ -// i++; -// swap data[i] and data[j] +// int partition(lo, hi, xs) { +// p = (lo+hi)/2 // pivot index +// i = lo +// j = hi-1 +// while (i < j) do { +// while (xs[i] < xs[p]) i ++; +// i_eq = (xs[i] == xs[p]); +// while (xs[j] > xs[p]) j --; +// j_eq = (xs[j] == xs[p]); +// if (i < j) { +// swap(xs[i], xs[j]) +// if (i == p) { +// p = j; +// } else if (j == p) { +// p = i; +// } +// if (i_eq && j_eq) { +// ++i; +// --j; +// } // } // } -// i++ -// swap data[i] and data[hi-1]) -// return i -// } +// return p +// } static void createPartitionFunc(OpBuilder &builder, ModuleOp module, func::FuncOp func, size_t dim) { OpBuilder::InsertionGuard insertionGuard(builder); @@ -309,60 +411,96 @@ static void createPartitionFunc(OpBuilder &builder, ModuleOp module, Block *entryBlock = func.addEntryBlock(); builder.setInsertionPointToStart(entryBlock); - MLIRContext *context = module.getContext(); Location loc = func.getLoc(); ValueRange args = entryBlock->getArguments(); Value lo = args[loIdx]; + Value hi = args[hiIdx]; + Value sum = builder.create(loc, lo, hi); Value c1 = constantIndex(builder, loc, 1); - Value i = builder.create(loc, lo, c1); - Value him1 = builder.create(loc, args[hiIdx], c1); - scf::ForOp forOp = - builder.create(loc, lo, him1, c1, ValueRange{i}); - - // Start the for-stmt body. - builder.setInsertionPointToStart(forOp.getBody()); - Value j = forOp.getInductionVar(); - SmallVector compareOperands{j, him1}; - ValueRange xs = args.slice(xStartIdx, dim); - compareOperands.append(xs.begin(), xs.end()); - Type i1Type = IntegerType::get(context, 1, IntegerType::Signless); - FlatSymbolRefAttr lessThanFunc = - getMangledSortHelperFunc(builder, func, {i1Type}, kLessThanFuncNamePrefix, - dim, compareOperands, createLessThanFunc); - Value cond = builder - .create(loc, lessThanFunc, TypeRange{i1Type}, - compareOperands) - .getResult(0); - scf::IfOp ifOp = - builder.create(loc, i.getType(), cond, /*else=*/true); + Value p = builder.create(loc, sum, c1); + + Value i = lo; + Value j = builder.create(loc, hi, c1); + SmallVector operands{i, j, p}; + SmallVector types{i.getType(), j.getType(), p.getType()}; + scf::WhileOp whileOp = builder.create(loc, types, operands); + + // The before-region of the WhileOp. + Block *before = + builder.createBlock(&whileOp.getBefore(), {}, types, {loc, loc, loc}); + builder.setInsertionPointToEnd(before); + Value cond = builder.create(loc, arith::CmpIPredicate::ult, + before->getArgument(0), + before->getArgument(1)); + builder.create(loc, cond, before->getArguments()); - // The if-stmt true branch: i++; swap(data[i], data[j]); yield i. + // The after-region of the WhileOp. + Block *after = + builder.createBlock(&whileOp.getAfter(), {}, types, {loc, loc, loc}); + builder.setInsertionPointToEnd(after); + i = after->getArgument(0); + j = after->getArgument(1); + p = after->getArgument(2); + + auto [iresult, iCompareEq] = createScanLoop( + builder, module, func, args.slice(xStartIdx, dim), i, p, dim, 1); + i = iresult; + auto [jresult, jCompareEq] = createScanLoop( + builder, module, func, args.slice(xStartIdx, dim), j, p, dim, -1); + j = jresult; + + // If i < j: + cond = builder.create(loc, arith::CmpIPredicate::ult, i, j); + scf::IfOp ifOp = builder.create(loc, types, cond, /*else=*/true); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - Value i1 = - builder.create(loc, forOp.getRegionIterArgs().front(), c1); - SmallVector swapOperands{i1, j}; + SmallVector swapOperands{i, j}; swapOperands.append(args.begin() + xStartIdx, args.end()); - FlatSymbolRefAttr swapFunc = getMangledSortHelperFunc( - builder, func, TypeRange(), kMaySwapFuncNamePrefix, dim, swapOperands, - createMaySwapFunc); - builder.create(loc, swapFunc, TypeRange(), swapOperands); - builder.create(loc, i1); - - // The if-stmt false branch: yield i. + createSwap(builder, loc, swapOperands); + // If the pivot is moved, update p with the new pivot. + Value icond = + builder.create(loc, arith::CmpIPredicate::eq, i, p); + scf::IfOp ifOpI = builder.create(loc, TypeRange{p.getType()}, + icond, /*else=*/true); + builder.setInsertionPointToStart(&ifOpI.getThenRegion().front()); + builder.create(loc, ValueRange{j}); + builder.setInsertionPointToStart(&ifOpI.getElseRegion().front()); + Value jcond = + builder.create(loc, arith::CmpIPredicate::eq, j, p); + scf::IfOp ifOpJ = builder.create(loc, TypeRange{p.getType()}, + jcond, /*else=*/true); + builder.setInsertionPointToStart(&ifOpJ.getThenRegion().front()); + builder.create(loc, ValueRange{i}); + builder.setInsertionPointToStart(&ifOpJ.getElseRegion().front()); + builder.create(loc, ValueRange{p}); + builder.setInsertionPointAfter(ifOpJ); + builder.create(loc, ifOpJ.getResults()); + builder.setInsertionPointAfter(ifOpI); + Value compareEqIJ = + builder.create(loc, iCompareEq, jCompareEq); + scf::IfOp ifOp2 = builder.create( + loc, TypeRange{i.getType(), j.getType()}, compareEqIJ, /*else=*/true); + builder.setInsertionPointToStart(&ifOp2.getThenRegion().front()); + Value i2 = builder.create(loc, i, c1); + Value j2 = builder.create(loc, j, c1); + builder.create(loc, ValueRange{i2, j2}); + builder.setInsertionPointToStart(&ifOp2.getElseRegion().front()); + builder.create(loc, ValueRange{i, j}); + builder.setInsertionPointAfter(ifOp2); + builder.create( + loc, + ValueRange{ifOp2.getResult(0), ifOp2.getResult(1), ifOpI.getResult(0)}); + + // False branch for if i < j: builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, forOp.getRegionIterArgs().front()); + builder.create(loc, ValueRange{i, j, p}); - // After the if-stmt, yield the updated i value to end the for-stmt body. + // Return for the whileOp. builder.setInsertionPointAfter(ifOp); - builder.create(loc, ifOp.getResult(0)); - - // After the for-stmt: i++; swap(data[i], data[him1]); return i. - builder.setInsertionPointAfter(forOp); - i1 = builder.create(loc, forOp.getResult(0), c1); - swapOperands[0] = i1; - swapOperands[1] = him1; - builder.create(loc, swapFunc, TypeRange(), swapOperands); - builder.create(loc, i1); + builder.create(loc, ifOp.getResults()); + + // Return for the function. + builder.setInsertionPointAfter(whileOp); + builder.create(loc, whileOp.getResult(2)); } /// Creates a function to perform quick sort on the value in the range of diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir index 114bfd874609f..f5634524f7e66 100644 --- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir +++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir @@ -92,28 +92,14 @@ func.func @sparse_push_back_inbound(%arg0: memref, %arg1: memref // CHECK: return %[[C]] // CHECK: } -// CHECK-LABEL: func.func private @_sparse_may_swap_1_i8_f32_index( -// CHECK-SAME: %[[I:arg0]]: index, -// CHECK-SAME: %[[J:.*]]: index, -// CHECK-SAME: %[[X0:.*]]: memref, -// CHECK-SAME: %[[Y0:.*]]: memref, -// CHECK-SAME: %[[Y1:.*]]: memref) { -// CHECK: %[[C:.*]] = arith.cmpi ne, %[[I]], %[[J]] -// CHECK: scf.if %[[C]] { -// CHECK: %[[Vx0i:.*]] = memref.load %[[X0]]{{\[}}%[[I]]] -// CHECK: %[[Vx0j:.*]] = memref.load %[[X0]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vx0j]], %[[X0]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vx0i]], %[[X0]]{{\[}}%[[J]]] -// CHECK: %[[Vy0i:.*]] = memref.load %[[Y0]]{{\[}}%[[I]]] -// CHECK: %[[Vy0j:.*]] = memref.load %[[Y0]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vy0j]], %[[Y0]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vy0i]], %[[Y0]]{{\[}}%[[J]]] -// CHECK: %[[Vy1i:.*]] = memref.load %[[Y1]]{{\[}}%[[I]]] -// CHECK: %[[Vy1j:.*]] = memref.load %[[Y1]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vy1j]], %[[Y1]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vy1i]], %[[Y1]]{{\[}}%[[J]]] -// CHECK: } -// CHECK: return +// CHECK-LABEL: func.func private @_sparse_compare_eq_1_i8( +// CHECK-SAME: %[[I:arg0]]: index, +// CHECK-SAME: %[[J:.*]]: index, +// CHECK-SAME: %[[X0:.*]]: memref) -> i1 { +// CHECK: %[[VI:.*]] = memref.load %[[X0]]{{\[}}%[[I]]] +// CHECK: %[[VJ:.*]] = memref.load %[[X0]]{{\[}}%[[J]]] +// CHECK: %[[C:.*]] = arith.cmpi eq, %[[VI]], %[[VJ]] +// CHECK: return %[[C]] // CHECK: } // CHECK-LABEL: func.func private @_sparse_partition_1_i8_f32_index( @@ -123,22 +109,27 @@ func.func @sparse_push_back_inbound(%arg0: memref, %arg1: memref // CHECK-SAME: %[[Y0:.*]]: memref, // CHECK-SAME: %[[Y1:.*]]: memref) -> index { // CHECK: %[[C1:.*]] = arith.constant 1 -// CHECK: %[[I:.*]] = arith.subi %[[L]], %[[C1]] -// CHECK: %[[Hm1:.*]] = arith.subi %[[H]], %[[C1]] -// CHECK: %[[I3:.*]] = scf.for %[[J:.*]] = %[[L]] to %[[Hm1]] step %[[C1]] iter_args(%[[I2:.*]] = %[[I]]) -> (index) { -// CHECK: %[[COND:.*]] = func.call @_sparse_less_than_1_i8(%[[J]], %[[Hm1]], %[[X0]]) -// CHECK: %[[IF:.*]] = scf.if %[[COND]] -> (index) { -// CHECK: %[[Ip1:.*]] = arith.addi %[[I2]], %[[C1]] -// CHECK: func.call @_sparse_may_swap_1_i8_f32_index(%[[Ip1]], %[[J]], %[[X0]], %[[Y0]], %[[Y1]]) -// CHECK: scf.yield %[[Ip1]] +// CHECK: %[[VAL_6:.*]] = arith.constant - +// CHECK: %[[SUM:.*]] = arith.addi %[[L]], %[[H]] +// CHECK: %[[P:.*]] = arith.shrui %[[SUM]], %[[C1]] +// CHECK: %[[J:.*]] = arith.subi %[[H]], %[[C1]] +// CHECK: %[[W:.*]]:3 = scf.while (%[[Ib:.*]] = %[[L]], %[[Jb:.*]] = %[[J]], %[[pb:.*]] = %[[P]]) : (index, index, index) -> (index, index, index) { +// CHECK: %[[Cn:.*]] = arith.cmpi ult, %[[Ib]], %[[Jb]] +// CHECK: scf.condition(%[[Cn]]) %[[Ib]], %[[Jb]], %[[pb]] +// CHECK: } do { +// CHECK: ^bb0(%[[Ia:.*]]: index, %[[Ja:.*]]: index, %[[Pa:.*]]: index): +// CHECK: %[[I2:.*]] = scf.while +// CHECK: %[[Ieq:.*]] = func.call @_sparse_compare_eq_1_i8(%[[I2:.*]], %[[Pa]], %[[X0]]) +// CHECK: %[[J2:.*]] = scf.while +// CHECK: %[[Jeq:.*]] = func.call @_sparse_compare_eq_1_i8(%[[J2:.*]], %[[Pa]], %[[X0]]) +// CHECK: %[[Cn2:.*]] = arith.cmpi ult, %[[I2]], %[[J2]] +// CHECK: %[[If:.*]]:3 = scf.if %[[Cn2]] -> (index, index, index) { // CHECK: } else { -// CHECK: scf.yield %[[I2]] +// CHECK: scf.yield %[[I2]], %[[J2]], %[[Pa]] // CHECK: } -// CHECK: scf.yield %[[IF:.*]] +// CHECK: scf.yield %[[If:.*]]#0, %[[If]]#1, %[[If]]#2 // CHECK: } -// CHECK: %[[I3p1:.*]] = arith.addi %[[I3:.*]], %[[C1]] : index -// CHECK: call @_sparse_may_swap_1_i8_f32_index(%[[I3p1]], %[[Hm1]], %[[X0]], %[[Y0]], %[[Y1]]) -// CHECK: return %[[I3p1]] +// CHECK: return %[[W:.*]]#2 // CHECK: } // CHECK-LABEL: func.func private @_sparse_sort_nonstable_1_i8_f32_index( @@ -181,7 +172,7 @@ func.func @sparse_sort_1d2v(%arg0: index, %arg1: memref<10xi8>, %arg2: memref, %arg3: memref, %arg4: memref) -> i1 { -// CHECK-DAG: func.func private @_sparse_may_swap_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) { +// CHECK-DAG: func.func private @_sparse_compare_eq_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) -> i1 { // CHECK-DAG: func.func private @_sparse_partition_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) -> index { // CHECK-DAG: func.func private @_sparse_sort_nonstable_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) { // CHECK-LABEL: func.func @sparse_sort_3d diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir index 650c0885fcb66..f0937e238af58 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir @@ -82,7 +82,7 @@ module { // CHECK: ( 1, 1, 2, 5, 10 ) // CHECK: ( 3, 3, 1, 10, 1 ) // CHECK: ( 9, 9, 4, 7, 2 ) - // CHECK: ( 7, 8, 10, 9, 6 ) + // CHECK: ( 8, 7, 10, 9, 6 ) call @storeValuesTo(%x0, %c10, %c2, %c1, %c5, %c1) : (memref, i32, i32, i32, i32, i32) -> () call @storeValuesTo(%x1, %c1, %c1, %c3, %c10, %c3) From a8c24d57b81703b5730460d7cb12af9783a02539 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Wed, 12 Oct 2022 09:20:05 +0000 Subject: [PATCH 426/516] [InstCombine] Remove redundant splats in InstCombineVectorOps Splatting the first vector element of the result of a BinOp, where any of the BinOp's operands are the result of a first vector element splat can be simplified to splatting the first vector element of the result of the BinOp Differential Revision: https://reviews.llvm.org/D135876 --- .../InstCombine/InstCombineInternal.h | 1 + .../InstCombine/InstCombineVectorOps.cpp | 33 ++- .../Transforms/InstCombine/shuffle-binop.ll | 77 +++++-- .../AArch64/insert-shuffle-binop.ll | 216 ------------------ 4 files changed, 96 insertions(+), 231 deletions(-) delete mode 100644 llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 3f1bcea3727f5..11aed7754c264 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -167,6 +167,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *simplifyBinOpSplats(ShuffleVectorInst &SVI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); Instruction *visitLandingPadInst(LandingPadInst &LI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d50918629ba5c..5964c96619a6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2598,6 +2598,35 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { return new ShuffleVectorInst(X, Y, NewMask); } +// Splatting the first element of the result of a BinOp, where any of the +// BinOp's operands are the result of a first element splat can be simplified to +// splatting the first element of the result of the BinOp +Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { + if (!match(SVI.getOperand(1), m_Undef()) || + !match(SVI.getShuffleMask(), m_ZeroMask())) + return nullptr; + + Value *Op0 = SVI.getOperand(0); + Value *X, *Y; + if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()), + m_Value(Y))) && + !match(Op0, m_BinOp(m_Value(X), + m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask())))) + return nullptr; + if (X->getType() != Y->getType()) + return nullptr; + + auto *BinOp = cast(Op0); + if (!isSafeToSpeculativelyExecute(BinOp)) + return nullptr; + + Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y); + if (auto NewBOI = dyn_cast(NewBO)) + NewBOI->copyIRFlags(BinOp); + + return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); +} + Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -2606,7 +2635,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); - // Bail out for scalable vectors + if (Instruction *I = simplifyBinOpSplats(SVI)) + return I; + if (isa(LHS->getType())) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/shuffle-binop.ll b/llvm/test/Transforms/InstCombine/shuffle-binop.ll index fe2d1af5a04f3..0be6a5bf9918c 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-binop.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-binop.ll @@ -50,13 +50,13 @@ define <4 x i8> @splat_binop_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = add <4 x i8> [[XSPLAT]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) - %b = add <4 x i8> %xsplat, %y + %b = add nsw <4 x i8> %xsplat, %y %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } @@ -65,14 +65,14 @@ define <4 x i8> @splat_binop_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = sub <4 x i8> [[X:%.*]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i8> [[X:%.*]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) %b = sub <4 x i8> %x, %ysplat - %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer + %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> ret <4 x i8> %bsplat } @@ -82,21 +82,40 @@ define <4 x i8> @splat_binop_splat_x_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i8> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) - %b = mul <4 x i8> %xsplat, %ysplat + %b = mul nuw <4 x i8> %xsplat, %ysplat %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } -define @vscale_splat_binop_splat_x( %x, %y) { -; CHECK-LABEL: @vscale_splat_binop_splat_x( +define <4 x float> @splat_binop_splat_x_splat_y_fmath_flags(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @splat_binop_splat_x_splat_y_fmath_flags( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[XSPLAT]]) +; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[YSPLAT]]) +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[BSPLAT]] +; + %xsplat = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %xsplat) + %ysplat = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %ysplat) + %b = fmul fast <4 x float> %xsplat, %ysplat + %bsplat = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %bsplat +} + +define @vscale_splat_udiv_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_udiv_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer ; CHECK-NEXT: [[B:%.*]] = udiv [[XSPLAT]], [[Y:%.*]] ; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer @@ -108,6 +127,19 @@ define @vscale_splat_binop_splat_x( %x, %bsplat } +define @vscale_splat_urem_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_urem_splat_x( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer +; CHECK-NEXT: [[B:%.*]] = urem [[XSPLAT]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: ret [[BSPLAT]] +; + %xsplat = shufflevector %x, poison, zeroinitializer + %b = urem %xsplat, %y + %bsplat = shufflevector %b, poison, zeroinitializer + ret %bsplat +} + define @vscale_splat_binop_splat_y( %x, %y) { ; CHECK-LABEL: @vscale_splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer @@ -140,8 +172,8 @@ define @vscale_splat_binop_splat_x_splat_y_calls( [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer ; CHECK-NEXT: call void @use_v( [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = lshr [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = lshr [[X]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer ; CHECK-NEXT: ret [[BSPLAT]] ; %xsplat = shufflevector %x, poison, zeroinitializer @@ -153,5 +185,22 @@ define @vscale_splat_binop_splat_x_splat_y_calls( %bsplat } +define <2 x double> @shuffle_op2_0th_element_mask(ptr %a, ptr %b) { + ;%0 = load <2 x double>, ptr @d, align 16 +; CHECK-LABEL: @shuffle_op2_0th_element_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x double> [[SHUFFLE]] +; + %1 = load <2 x double>, ptr %a, align 16 + %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer + %3 = load <2 x double>, ptr %b, align 16 + %sub = fsub <2 x double> %3, %2 + %shuffle = shufflevector <2 x double> %sub, <2 x double> %sub, <2 x i32> + ret <2 x double> %shuffle +} + declare void @use(<4 x i8>) -declare void @use_v() \ No newline at end of file +declare void @use_v() diff --git a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll b/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll deleted file mode 100644 index c75f53bc68583..0000000000000 --- a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll +++ /dev/null @@ -1,216 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='vector-combine' -S %s | FileCheck %s - -target triple = "aarch64-none-eabi" - -define @fadd_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fadd fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fsub fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} From a9d7b18b4a853daa8fecb5d5863af211841de762 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 1 Nov 2022 17:11:47 +0000 Subject: [PATCH 427/516] [AArch64][SVE2] Add the SVE2.1 quadword variants of ld1w/ld1d/st1w/st1d This patch adds the assembly/disassembly for the following instructions: st1w: Contiguous store words from vector (128-bit vector elements) st1d: Contiguous store doublewords from vector (128-bit vector elements) ld1w: Contiguous load unsigned words to vector (128-bit vector elements) ld1d: Contiguous load unsigned doublewords to vector (128-bit vector elements) The reference can be found here: https://developer.arm.com/documentation/ddi0602/2022-09 Differential Revision: https://reviews.llvm.org/D137245 --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 24 ++++++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 62 ++++++++++++++++ .../MC/AArch64/SVE2p1/ld1d_q-diagnostics.s | 32 ++++++++ llvm/test/MC/AArch64/SVE2p1/ld1d_q.s | 73 ++++++++++++++++++ .../MC/AArch64/SVE2p1/ld1w_q-diagnostics.s | 32 ++++++++ llvm/test/MC/AArch64/SVE2p1/ld1w_q.s | 62 ++++++++++++++++ .../MC/AArch64/SVE2p1/st1d_q-diagnostics.s | 33 +++++++++ llvm/test/MC/AArch64/SVE2p1/st1d_q.s | 74 +++++++++++++++++++ .../MC/AArch64/SVE2p1/st1w_q-diagnostics.s | 32 ++++++++ llvm/test/MC/AArch64/SVE2p1/st1w_q.s | 74 +++++++++++++++++++ 10 files changed, 498 insertions(+) create mode 100644 llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/ld1d_q.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/ld1w_q.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/st1d_q.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/st1w_q.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 2a60c32edc9d7..32bdf17a4c3e3 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -904,10 +904,16 @@ let Predicates = [HasSVEorSME] in { defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm LD1W_Q_IMM : sve_mem_128b_cld_si<0b10, "ld1w">; + } defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm LD1D_Q_IMM : sve_mem_128b_cld_si<0b11, "ld1d">; + } // LD1R loads (splat scalar to vector) defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; @@ -965,10 +971,16 @@ let Predicates = [HasSVEorSME] in { defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + let Predicates = [HasSVE2p1] in { + defm LD1W_Q : sve_mem_128b_cld_ss<0b10, "ld1w", GPR64NoXZRshifted32>; + } defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + let Predicates = [HasSVE2p1] in { + defm LD1D_Q : sve_mem_128b_cld_ss<0b11, "ld1d", GPR64NoXZRshifted64>; + } } // End HasSVEorSME let Predicates = [HasSVE] in { @@ -1265,7 +1277,13 @@ let Predicates = [HasSVEorSME] in { defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm ST1W_Q_IMM : sve_mem_cst_si<0b10, 0b00, "st1w", Z_q, ZPR128>; + } defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm ST1D_Q_IMM : sve_mem_cst_si<0b11, 0b10, "st1d", Z_q, ZPR128>; + } // contiguous store with reg+reg addressing. defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; @@ -1277,7 +1295,13 @@ let Predicates = [HasSVEorSME] in { defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + let Predicates = [HasSVE2p1] in { + defm ST1W_Q : sve_mem_cst_ss<0b1000, "st1w", Z_q, ZPR128, GPR64NoXZRshifted32>; + } defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + let Predicates = [HasSVE2p1] in { + defm ST1D_Q : sve_mem_cst_ss<0b1110, "st1d", Z_q, ZPR128, GPR64NoXZRshifted64>; + } } // End HasSVEorSME let Predicates = [HasSVE] in { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index f91e5243fca4c..f9d6abdd52929 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9221,3 +9221,65 @@ multiclass sve_mem_sst_128b_64_unscaled { def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; } + + +// SVE contiguous load (quadwords, scalar plus immediate) +class sve_mem_128b_cld_si dtype, string mnemonic> + : I<(outs Z_q:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + mnemonic, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = dtype; + let Inst{22-20} = 0b001; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_128b_cld_si dtype, string mnemonic> { + def NAME : sve_mem_128b_cld_si; + + def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; +} + + +// SVE contiguous load (quadwords, scalar plus scalar) +class sve_mem_128b_cld_ss dtype, string mnemonic, RegisterOperand gprsh_ty> + : I<(outs Z_q:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprsh_ty:$Rm), + mnemonic, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", + []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<5> Rm; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = dtype; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_128b_cld_ss dtype, string mnemonic, RegisterOperand gprsh_ty> { + def NAME : sve_mem_128b_cld_ss; + + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprsh_ty:$Rm), 0>; +} diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s new file mode 100644 index 0000000000000..7860d9a610d01 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +ld1d {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1d {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1d {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1d {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1d {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s b/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s new file mode 100644 index 0000000000000..12ecde8dc80f8 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s @@ -0,0 +1,73 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1d {z0.q}, p0/z, [x0, x0, lsl #3] // 10100101-10000000-10000000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x80,0x80,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5808000 + +ld1d {z21.q}, p5/z, [x10, x21, lsl #3] // 10100101-10010101-10010101-01010101 +// CHECK-INST: ld1d { z21.q }, p5/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x95,0x95,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5959555 + +ld1d {z23.q}, p3/z, [x13, x8, lsl #3] // 10100101-10001000-10001101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x8d,0x88,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5888db7 + +ld1d z23.q, p3/z, [x13, x8, lsl #3] // 10100101-10001000-10001101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x8d,0x88,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5888db7 + +ld1d {z0.q}, p0/z, [x0] // 10100101-10010000-00100000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x90,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5902000 + +ld1d z0.q, p0/z, [x0] // 10100101-10010000-00100000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x90,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5902000 + +ld1d {z21.q}, p5/z, [x10, #5, mul vl] // 10100101-10010101-00110101-01010101 +// CHECK-INST: ld1d { z21.q }, p5/z, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x95,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5953555 + +ld1d {z23.q}, p3/z, [x13, #-8, mul vl] // 10100101-10011000-00101101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x98,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5982db7 + +ld1d {z31.q}, p7/z, [sp, #-1, mul vl] // 10100101-10011111-00111111-11111111 +// CHECK-INST: ld1d { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x9f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a59f3fff + +ld1d z31.q, p7/z, [sp, #-1, mul vl] // 10100101-10011111-00111111-11111111 +// CHECK-INST: ld1d { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x9f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a59f3fff diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s new file mode 100644 index 0000000000000..f049add93eb75 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +ld1w {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1w {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1w {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1w {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1w {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s b/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s new file mode 100644 index 0000000000000..9450ac5b0fe92 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1w {z0.q}, p0/z, [x0, x0, lsl #2] // 10100101-00000000-10000000-00000000 +// CHECK-INST: ld1w { z0.q }, p0/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x80,0x00,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5008000 + +ld1w {z21.q}, p5/z, [x10, x21, lsl #2] // 10100101-00010101-10010101-01010101 +// CHECK-INST: ld1w { z21.q }, p5/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x95,0x15,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5159555 + +ld1w {z23.q}, p3/z, [x13, x8, lsl #2] // 10100101-00001000-10001101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x8d,0x08,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5088db7 + +ld1w z23.q, p3/z, [x13, x8, lsl #2] // 10100101-00001000-10001101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x8d,0x08,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5088db7 + +ld1w {z0.q}, p0/z, [x0] // 10100101-00010000-00100000-00000000 +// CHECK-INST: ld1w { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x10,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5102000 + +ld1w {z21.q}, p5/z, [x10, #5, mul vl] // 10100101-00010101-00110101-01010101 +// CHECK-INST: ld1w { z21.q }, p5/z, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x15,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5153555 + +ld1w {z23.q}, p3/z, [x13, #-8, mul vl] // 10100101-00011000-00101101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x18,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5182db7 + +ld1w {z31.q}, p7/z, [sp, #-1, mul vl] // 10100101-00011111-00111111-11111111 +// CHECK-INST: ld1w { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x1f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a51f3fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s new file mode 100644 index 0000000000000..4ad52197095dc --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s @@ -0,0 +1,33 @@ +-26 +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +st1d {z0.q}, p8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1d {z0.q}, p8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1d {z0.q}, p0, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1d {z0.q}, p0, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z3.q}, p0, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1d {z3.q}, p0, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d_q.s b/llvm/test/MC/AArch64/SVE2p1/st1d_q.s new file mode 100644 index 0000000000000..52d1f1635bf34 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d_q.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1d {z0.q}, p0, [x0, x0, lsl #3] // 11100101-11000000-01000000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x40,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c04000 + +st1d {z21.q}, p5, [x10, x21, lsl #3] // 11100101-11010101-01010101-01010101 +// CHECK-INST: st1d { z21.q }, p5, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x55,0xd5,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5d55555 + +st1d {z23.q}, p3, [x13, x8, lsl #3] // 11100101-11001000-01001101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x4d,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c84db7 + +st1d z23.q, p3, [x13, x8, lsl #3] // 11100101-11001000-01001101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x4d,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c84db7 + +st1d {z0.q}, p0, [x0] // 11100101-11000000-11100000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c0e000 + +st1d z0.q, p0, [x0] // 11100101-11000000-11100000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c0e000 + +st1d {z21.q}, p5, [x10, #5, mul vl] // 11100101-11000101-11110101-01010101 +// CHECK-INST: st1d { z21.q }, p5, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0xc5,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c5f555 + +st1d {z23.q}, p3, [x13, #-8, mul vl] // 11100101-11001000-11101101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0xed,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c8edb7 + +st1d {z31.q}, p7, [sp, #-1, mul vl] // 11100101-11001111-11111111-11111111 +// CHECK-INST: st1d { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0xcf,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5cfffff + +st1d z31.q, p7, [sp, #-1, mul vl] // 11100101-11001111-11111111-11111111 +// CHECK-INST: st1d { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0xcf,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5cfffff + diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s new file mode 100644 index 0000000000000..d337e62666360 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +st1w {z0.q}, p8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1w {z0.q}, p8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1w {z0.q}, p0, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1w {z0.q}, p0, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z3.q}, p0, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1w {z3.q}, p0, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w_q.s b/llvm/test/MC/AArch64/SVE2p1/st1w_q.s new file mode 100644 index 0000000000000..efb682692224e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w_q.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1w {z0.q}, p0, [x0, x0, lsl #2] // 11100101-00000000-01000000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5004000 + +st1w {z21.q}, p5, [x10, x21, lsl #2] // 11100101-00010101-01010101-01010101 +// CHECK-INST: st1w { z21.q }, p5, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x15,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5155555 + +st1w {z23.q}, p3, [x13, x8, lsl #2] // 11100101-00001000-01001101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5084db7 + +st1w z23.q, p3, [x13, x8, lsl #2] // 11100101-00001000-01001101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5084db7 + +st1w {z0.q}, p0, [x0] // 11100101-00000000-11100000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e500e000 + +st1w z0.q, p0, [x0] // 11100101-00000000-11100000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e500e000 + +st1w {z21.q}, p5, [x10, #5, mul vl] // 11100101-00000101-11110101-01010101 +// CHECK-INST: st1w { z21.q }, p5, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0x05,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e505f555 + +st1w {z23.q}, p3, [x13, #-8, mul vl] // 11100101-00001000-11101101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0xed,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e508edb7 + +st1w {z31.q}, p7, [sp, #-1, mul vl] // 11100101-00001111-11111111-11111111 +// CHECK-INST: st1w { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0x0f,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e50fffff + +st1w z31.q, p7, [sp, #-1, mul vl] // 11100101-00001111-11111111-11111111 +// CHECK-INST: st1w { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0x0f,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e50fffff + From 36e8e19337b9a6f47c7f17c849164df47d45fa65 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Thu, 3 Nov 2022 22:07:58 -0700 Subject: [PATCH 428/516] [NFC][BlockPlacement]Add an option to renumber blocks based on function layout order. Use case: - When block layout is visualized after MBP pass, the basic blocks are labeled in layout order; meanwhile blocks could be numbered in a different order. - As a result, it's hard to map between the graph and pass output. With this option on, the basic blocks are renumbered in function layout order. This option is only useful when a function is to be visualized (i.e., when view options are on) to make it debugging only. Use https://godbolt.org/z/5WTW36bMr as an example: - As MBP pass output (shown in godbolt output window), `func2` is in a basic block numbered `2` (`bb.2`), and `func1` is in a basic block numbered `3` (`bb.3`); `bb.3` is a block with higher block frequency than `bb.2`, and `bb.3` is placed before `bb.2` in the functin layout. - Use [1] to get the dot graph (graph uploaded in [2]), the blocks are re-numbered. - `func1` is in 'if.end' block, and labeled `1` in visualized dot; `func2` is in 'if.then' blocks, and labeled `3` --> the labeled number and bb number won't map. - [[ https://github.com/llvm/llvm-project/blob/b5626ae9751f0d82aa04791a21689b289721738e/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp#L127 | DOTGraphTraits::getNodeLabel ]] is where labeled numbers are based on function layout number, and [[ https://github.com/llvm/llvm-project/blob/a8d93783f37c042ace67069ae4ca6f8fd849c2d0/llvm/include/llvm/Support/GraphWriter.h#L209 | called by graph writer ]]. So call 'MachineFunction::RenumberBlocks' would make labeled number (in dot graph) and block number (in pass output) consistent with each other. [1] `./bin/clang++ -O3 -S -mllvm -view-block-layout-with-bfi=count -mllvm -view-bfi-func-name=_Z9func_loopv -mllvm -print-after=block-placement -mllvm -filter-print-funcs=_Z9func_loopv test.c` [2] {F25201785} Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D137467 --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4884ac9417204..7bbc347a8cf88 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -201,6 +201,18 @@ static cl::opt TriangleChainCount( cl::init(2), cl::Hidden); +// Use case: When block layout is visualized after MBP pass, the basic blocks +// are labeled in layout order; meanwhile blocks could be numbered in a +// different order. It's hard to map between the graph and pass output. +// With this option on, the basic blocks are renumbered in function layout +// order. For debugging only. +static cl::opt RenumberBlocksBeforeView( + "renumber-blocks-before-view", + cl::desc( + "If true, basic blocks are re-numbered before MBP layout is printed " + "into a dot graph. Only used when a function is being printed."), + cl::init(false), cl::Hidden); + extern cl::opt EnableExtTspBlockPlacement; extern cl::opt ApplyExtTspWithoutProfile; @@ -3466,6 +3478,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || F->getFunction().getName().equals(ViewBlockFreqFuncName))) { + if (RenumberBlocksBeforeView) + MF.RenumberBlocks(); MBFI->view("MBP." + MF.getName(), false); } From a50c269c7372f5f0373fe3876ed8f8acf0e2f12d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Nov 2022 17:02:19 +0100 Subject: [PATCH 429/516] [InstCombine] Handle load smaller than one byte in memset forward APInt::getSplat() requires that the new size is >= the original one. If we're loading less than 8 bits, truncate instead. Fixes https://github.com/llvm/llvm-project/issues/58845. --- llvm/lib/Analysis/Loads.cpp | 12 +++++++---- .../InstCombine/load-store-forward.ll | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 93faefa947a3e..bc16c00c53206 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -532,13 +532,17 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, if (IsLoadCSE) *IsLoadCSE = false; + TypeSize LoadTypeSize = DL.getTypeSizeInBits(AccessTy); + if (LoadTypeSize.isScalable()) + return nullptr; + // Make sure the read bytes are contained in the memset. - TypeSize LoadSize = DL.getTypeSizeInBits(AccessTy); - if (LoadSize.isScalable() || - (Len->getValue() * 8).ult(LoadSize.getFixedSize())) + uint64_t LoadSize = LoadTypeSize.getFixedSize(); + if ((Len->getValue() * 8).ult(LoadSize)) return nullptr; - APInt Splat = APInt::getSplat(LoadSize.getFixedSize(), Val->getValue()); + APInt Splat = LoadSize >= 8 ? APInt::getSplat(LoadSize, Val->getValue()) + : Val->getValue().trunc(LoadSize); ConstantInt *SplatC = ConstantInt::get(MSI->getContext(), Splat); if (CastInst::isBitOrNoopPointerCastable(SplatC->getType(), AccessTy, DL)) return SplatC; diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index 5a847cd68db84..6be5f6ed42d53 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -284,6 +284,16 @@ define i27 @load_after_memset_0_non_byte_sized(ptr %a) { ret i27 %v } +define i1 @load_after_memset_0_i1(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_i1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i1 false +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i1, ptr %a + ret i1 %v +} + define <4 x i8> @load_after_memset_0_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_0_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) @@ -324,6 +334,16 @@ define i27 @load_after_memset_1_non_byte_sized(ptr %a) { ret i27 %v } +define i1 @load_after_memset_1_i1(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_i1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret i1 true +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i1, ptr %a + ret i1 %v +} + define <4 x i8> @load_after_memset_1_vec(ptr %a) { ; CHECK-LABEL: @load_after_memset_1_vec( ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) From d35fcf0e97e7bb02381506a71e61ec282b292c50 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Nov 2022 15:50:05 +0100 Subject: [PATCH 430/516] [WebAssembly] Use default attributes for intrinsics This switches wasm intrinsics to use default attributes, i.e. nofree, nosync, nocallback and willreturn. Especially willreturn will be required to avoid optimization regressions in the future. The attributes are omitted from the trapping fptoi intrinsics (where I assume trapping is considered well-defined, and as such these aren't willreturn), the throw/rethrow intrinsics (which will unwind) and the atomic intrinsics (which aren't nosync). Differential Revision: https://reviews.llvm.org/D137551 --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 335 +++++++++--------- .../CodeGen/WebAssembly/lower-wasm-ehsjlj.ll | 2 +- 2 files changed, 173 insertions(+), 164 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 3b28f958020ce..b8750abca2050 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -19,64 +19,69 @@ let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.". // Query the current memory size, and increase the current memory size. // Note that memory.size is not IntrNoMem because it must be sequenced with // respect to memory.grow calls. -def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty], - [llvm_i32_ty], - [IntrReadMem]>; -def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty], - [llvm_i32_ty, LLVMMatchType<0>], - []>; +def int_wasm_memory_size : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrReadMem]>; +def int_wasm_memory_grow : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty, LLVMMatchType<0>], []>; //===----------------------------------------------------------------------===// // ref.null intrinsics //===----------------------------------------------------------------------===// -def int_wasm_ref_null_extern : Intrinsic<[llvm_externref_ty], [], [IntrNoMem]>; -def int_wasm_ref_null_func : Intrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; -def int_wasm_ref_is_null_extern : Intrinsic<[llvm_i32_ty], [llvm_externref_ty], - [IntrNoMem], "llvm.wasm.ref.is_null.extern">; -def int_wasm_ref_is_null_func : Intrinsic<[llvm_i32_ty], [llvm_funcref_ty], - [IntrNoMem], "llvm.wasm.ref.is_null.func">; +def int_wasm_ref_null_extern : + DefaultAttrsIntrinsic<[llvm_externref_ty], [], [IntrNoMem]>; +def int_wasm_ref_null_func : + DefaultAttrsIntrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; +def int_wasm_ref_is_null_extern : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_externref_ty], [IntrNoMem], + "llvm.wasm.ref.is_null.extern">; +def int_wasm_ref_is_null_func : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_funcref_ty], + [IntrNoMem], "llvm.wasm.ref.is_null.func">; //===----------------------------------------------------------------------===// // Table intrinsics //===----------------------------------------------------------------------===// -def int_wasm_table_set_externref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_externref_ty], - [IntrWriteMem]>; -def int_wasm_table_set_funcref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], - [IntrWriteMem]>; - -def int_wasm_table_get_externref : Intrinsic<[llvm_externref_ty], - [llvm_table_ty, llvm_i32_ty], - [IntrReadMem]>; -def int_wasm_table_get_funcref : Intrinsic<[llvm_funcref_ty], - [llvm_table_ty, llvm_i32_ty], - [IntrReadMem]>; +def int_wasm_table_set_externref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_externref_ty], + [IntrWriteMem]>; +def int_wasm_table_set_funcref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], + [IntrWriteMem]>; + +def int_wasm_table_get_externref : + DefaultAttrsIntrinsic<[llvm_externref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; +def int_wasm_table_get_funcref : + DefaultAttrsIntrinsic<[llvm_funcref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; // Query the current table size, and increase the current table size. -def int_wasm_table_size : Intrinsic<[llvm_i32_ty], - [llvm_table_ty], - [IntrReadMem]>; -def int_wasm_table_copy : Intrinsic<[], - [llvm_table_ty, llvm_table_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_wasm_table_grow_externref : Intrinsic<[llvm_i32_ty], - [llvm_table_ty, llvm_externref_ty, llvm_i32_ty], - []>; -def int_wasm_table_grow_funcref : Intrinsic<[llvm_i32_ty], - [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], - []>; -def int_wasm_table_fill_externref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, llvm_i32_ty], - []>; -def int_wasm_table_fill_funcref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, llvm_i32_ty], - []>; +def int_wasm_table_size : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_table_ty], [IntrReadMem]>; +def int_wasm_table_copy : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_table_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_wasm_table_grow_externref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_externref_ty, llvm_i32_ty], []>; +def int_wasm_table_grow_funcref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_fill_externref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, + llvm_i32_ty], []>; +def int_wasm_table_fill_funcref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, + llvm_i32_ty], []>; //===----------------------------------------------------------------------===// // Trapping float-to-int conversions //===----------------------------------------------------------------------===// +// These don't use default attributes, because they are not willreturn. def int_wasm_trunc_signed : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; @@ -88,12 +93,12 @@ def int_wasm_trunc_unsigned : Intrinsic<[llvm_anyint_ty], // Saturating float-to-int conversions //===----------------------------------------------------------------------===// -def int_wasm_trunc_saturate_signed : Intrinsic<[llvm_anyint_ty], - [llvm_anyfloat_ty], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty], - [llvm_anyfloat_ty], - [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_signed : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_unsigned : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Exception handling intrinsics @@ -108,32 +113,35 @@ def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>; // Since wasm does not use landingpad instructions, these instructions return // exception pointer and selector values until we lower them in WasmEHPrepare. -def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], - [IntrHasSideEffects]>; -def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty], - [IntrHasSideEffects]>; +def int_wasm_get_exception : + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrHasSideEffects]>; +def int_wasm_get_ehselector : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrHasSideEffects]>; // wasm.catch returns the pointer to the exception object caught by wasm 'catch' // instruction. This returns a single pointer, which is the case for C++ // exceptions. The immediate argument is an index to for a tag, which is 0 for // C++ exceptions. -def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], - [IntrHasSideEffects, ImmArg>]>; +def int_wasm_catch : + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrHasSideEffects, ImmArg>]>; // WebAssembly EH must maintain the landingpads in the order assigned to them // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is // used in order to give them the indices in WasmEHPrepare. -def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; +def int_wasm_landingpad_index : + DefaultAttrsIntrinsic<[], [llvm_token_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; // Returns LSDA address of the current function. -def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_wasm_lsda : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Atomic intrinsics //===----------------------------------------------------------------------===// // wait / notify +// These don't use default attributes, because they are not nosync. def int_wasm_memory_atomic_wait32 : Intrinsic<[llvm_i32_ty], [LLVMPointerType, llvm_i32_ty, llvm_i64_ty], @@ -157,152 +165,153 @@ def int_wasm_memory_atomic_notify: //===----------------------------------------------------------------------===// def int_wasm_swizzle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_shuffle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_sat_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_sat_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_avgr_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_bitselect : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_anytrue : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_alltrue : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_bitmask : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot : - Intrinsic<[llvm_v4i32_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_narrow_signed : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_narrow_unsigned : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_q15mulr_sat_signed : - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_pmin : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_pmax : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_extadd_pairwise_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_extadd_pairwise_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Relaxed SIMD intrinsics (experimental) //===----------------------------------------------------------------------===// def int_wasm_fma : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_fms : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_laneselect : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_swizzle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_min : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_max : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_signed: - Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_unsigned: - Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_signed_zero: - Intrinsic<[llvm_v4i32_ty], - [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_unsigned_zero: - Intrinsic<[llvm_v4i32_ty], - [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_q15mulr_signed: - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot_i8x16_i7x16_signed: - Intrinsic<[llvm_v8i16_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot_i8x16_i7x16_add_signed: - Intrinsic<[llvm_v4i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_dot_bf16x8_add_f32: - Intrinsic<[llvm_v4f32_ty], - [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// @@ -310,18 +319,18 @@ def int_wasm_relaxed_dot_bf16x8_add_f32: //===----------------------------------------------------------------------===// def int_wasm_tls_size : - Intrinsic<[llvm_anyint_ty], - [], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_tls_align : - Intrinsic<[llvm_anyint_ty], - [], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_tls_base : - Intrinsic<[llvm_ptr_ty], - [], - [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_ptr_ty], + [], + [IntrReadMem]>; } // TargetPrefix = "wasm" diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll index 446c298865ade..beb1b6d7ec8d5 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll @@ -109,7 +109,7 @@ catch: ; preds = %catch.start catchret from %2 to label %catchret.dest ; CHECK: catch: ; preds = %catch.start ; CHECK-NEXT: %exn = load i8*, i8** %exn.slot15, align 4 -; CHECK-NEXT: %5 = call i8* @__cxa_begin_catch(i8* %exn) #2 [ "funclet"(token %2) ] +; CHECK-NEXT: %5 = call i8* @__cxa_begin_catch(i8* %exn) #7 [ "funclet"(token %2) ] ; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ] ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp From cf24d49dc81b06e8efff15bd77f332840180867c Mon Sep 17 00:00:00 2001 From: bixia1 Date: Mon, 7 Nov 2022 08:18:53 -0800 Subject: [PATCH 431/516] [mlir][sparse] Add sparse_tensor.sort_coo operator. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137442 --- .../SparseTensor/IR/SparseTensorOps.td | 39 +++++++++++++++++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 36 +++++++++++++++++ mlir/test/Dialect/SparseTensor/invalid.mlir | 26 +++++++++++++ mlir/test/Dialect/SparseTensor/roundtrip.mlir | 15 +++++++ 4 files changed, 116 insertions(+) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index a22dcce4298ef..52a6aff752792 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -518,6 +518,45 @@ def SparseTensor_SortOp : SparseTensor_Op<"sort", [AttrSizedOperandSegments]>, let hasVerifier = 1; } +def SparseTensor_SortCooOp : SparseTensor_Op<"sort_coo">, + Arguments<(ins Index:$n, StridedMemRefRankOf<[AnyInteger, Index], [1]>:$xy, + Variadic>:$ys, + OptionalAttr:$nx, OptionalAttr:$ny, + UnitAttr:$stable)> { + let summary = "Sorts the arrays in xs and ys lexicographically on the " + "integral values found in the xs list"; + let description = [{ + Sparse_tensor.sort_coo is similar to sparse_tensor.sort, except that all the + `xs` values and some `ys` values are put in the linear buffer `xy`. The + optional index attribute `nx` provides the number of `xs` values in `xy`. + When `ns` is not explicitly specified, its value is 1. The optional index + attribute `ny` provides the number of `ys` values in `xy`. When `ny` is not + explicitly specified, its value is 0. This instruction supports the TACO + COO style storage format for better sorting performance. + + The buffer xy should have a dimension not less than n * (nx + ny) while the + buffers in `ys` should have a dimension not less than `n`. The behavior of + the operator is undefined if this condition is not met. + + Example: + + ```mlir + sparse_tensor.sort_coo %n, %x { nx = 2 : index} + : memref + ``` + + ```mlir + sparse_tensor.sort %n, %xy jointly %y1 { nx = 2 : index, ny = 2 : index} + : memref jointly memref + ``` + }]; + + let assemblyFormat = "(`stable` $stable^)? $n" + "`,`$xy (`jointly` $ys^)? attr-dict" + "`:` type($xy) (`jointly` type($ys)^)?"; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Syntax Operations. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 9d8cf37befd49..693af03a94cb5 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -719,6 +719,42 @@ LogicalResult SortOp::verify() { return success(); } +LogicalResult SortCooOp::verify() { + auto cn = getN().getDefiningOp(); + // We can't check the size of the buffers when n or buffer dimensions aren't + // compile-time constants. + if (!cn) + return success(); + + uint64_t n = cn.value(); + uint64_t nx = 1; + if (auto nxAttr = getNxAttr()) { + nx = nxAttr.getInt(); + if (nx < 1) + emitError(llvm::formatv("Expected nx > 1, got {0}", nx)); + } + uint64_t ny = 0; + if (auto nyAttr = getNyAttr()) { + ny = nyAttr.getInt(); + } + + auto checkDim = [&](Value v, uint64_t min, const char *message) { + MemRefType tp = v.getType().cast(); + int64_t dim = tp.getShape()[0]; + if (dim != ShapedType::kDynamicSize && dim < min) { + emitError(llvm::formatv("{0} got {1} < {2}", message, dim, min)); + } + }; + + checkDim(getXy(), n * (nx + ny), "Expected dimension(xy) >= n * (nx + ny)"); + + for (Value opnd : getYs()) { + checkDim(opnd, n, "Expected dimension(y) >= n"); + } + + return success(); +} + LogicalResult YieldOp::verify() { // Check for compatible parent. auto *parentOp = (*this)->getParentOp(); diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 407f19401b86b..02fb97bc866c6 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -622,6 +622,32 @@ func.func @sparse_sort_mismatch_x_type(%arg0: index, %arg1: memref<10xindex>, %a // ----- +func.func @sparse_sort_coo_x_type( %arg0: index, %arg1: memref) { + // expected-error@+1 {{operand #1 must be 1D memref of integer or index values}} + sparse_tensor.sort_coo %arg0, %arg1: memref + return +} + +// ----- + +func.func @sparse_sort_coo_x_too_small(%arg0: memref<50xindex>) { + %i20 = arith.constant 20 : index + // expected-error@+1 {{Expected dimension(xy) >= n * (nx + ny) got 50 < 60}} + sparse_tensor.sort_coo %i20, %arg0 {nx = 2 : index, ny = 1 : index} : memref<50xindex> + return +} + +// ----- + +func.func @sparse_sort_coo_y_too_small(%arg0: memref<60xindex>, %arg1: memref<10xf32>) { + %i20 = arith.constant 20 : index + // expected-error@+1 {{Expected dimension(y) >= n got 10 < 20}} + sparse_tensor.sort_coo %i20, %arg0 jointly %arg1 {nx = 2 : index, ny = 1 : index} : memref<60xindex> jointly memref<10xf32> + return +} + +// ----- + #CSR = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> func.func @sparse_alloc_escapes(%arg0: index) -> tensor<10x?xf64, #CSR> { diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 7f850ccbbc4e2..bc664ae3d2d00 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -484,3 +484,18 @@ func.func @sparse_sort_stable(%arg0: index, %arg1: memref<10xi8>, %arg2: memref< sparse_tensor.sort stable %arg0, %arg1, %arg2 jointly %arg3 : memref<10xi8>, memref<20xi8> jointly memref<10xf64> return %arg1, %arg2, %arg3 : memref<10xi8>, memref<20xi8>, memref<10xf64> } + +// ----- + +func.func @sparse_sort_coo(%arg0: index, %arg1: memref) -> (memref) { + sparse_tensor.sort_coo %arg0, %arg1 { nx=2 : index, ny=1 : index}: memref + return %arg1 : memref +} + +// ----- + +func.func @sparse_sort_coo_stable(%arg0: index, %arg1: memref, %arg2: memref) -> (memref, memref) { + sparse_tensor.sort_coo stable %arg0, %arg1 jointly %arg2 { nx=2 : index, ny=1 : index}: memref jointly memref + return %arg1, %arg2 : memref, memref +} + From ecd0b5a5327a801377327f1f376219ba128b1159 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 7 Nov 2022 08:33:41 -0800 Subject: [PATCH 432/516] Revert "[SLP]Redesign vectorization of the gather nodes." This reverts commit 8ddd1ccdf89317be1c40fa9183e214878a56151e to fix buildbots failures reported in https://lab.llvm.org/buildbot#builders/74/builds/14839 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 336 +++++++----------- .../SLPVectorizer/AArch64/matmul.ll | 66 ++-- .../SLPVectorizer/AArch64/slp-fma-loss.ll | 140 ++++---- .../SLPVectorizer/AArch64/splat-loads.ll | 64 ++-- .../SLPVectorizer/AArch64/tsc-s116.ll | 29 +- .../vectorizable-selects-uniform-cmps.ll | 16 +- .../vectorize-free-extracts-inserts.ll | 46 +-- .../SLPVectorizer/AMDGPU/packed-math.ll | 8 +- .../Transforms/SLPVectorizer/X86/PR35777.ll | 22 +- .../Transforms/SLPVectorizer/X86/PR39774.ll | 18 +- .../X86/alternate-cmp-swapped-pred.ll | 4 +- .../SLPVectorizer/X86/broadcast_long.ll | 5 +- .../SLPVectorizer/X86/buildvector-shuffle.ll | 6 +- .../Transforms/SLPVectorizer/X86/c-ray.ll | 20 +- .../Transforms/SLPVectorizer/X86/cmp_sel.ll | 8 +- .../SLPVectorizer/X86/commutativity.ll | 6 +- .../SLPVectorizer/X86/compare-reduce.ll | 22 +- .../SLPVectorizer/X86/crash_cmpop.ll | 40 +-- .../X86/crash_exceed_scheduling.ll | 34 +- llvm/test/Transforms/SLPVectorizer/X86/cse.ll | 36 +- .../X86/extract-scalar-from-undef.ll | 9 +- .../SLPVectorizer/X86/extract_in_tree_user.ll | 30 +- .../X86/extractelement-multiple-uses.ll | 10 +- .../SLPVectorizer/X86/extractelement.ll | 20 +- .../SLPVectorizer/X86/horizontal-list.ll | 20 +- .../SLPVectorizer/X86/in-tree-user.ll | 24 +- .../SLPVectorizer/X86/insert-shuffle.ll | 7 +- .../X86/jumbled-load-multiuse.ll | 9 +- .../Transforms/SLPVectorizer/X86/lookahead.ll | 84 ++--- .../X86/matched-shuffled-entries.ll | 24 +- .../SLPVectorizer/X86/ordering-bug.ll | 22 +- .../Transforms/SLPVectorizer/X86/partail.ll | 35 +- .../SLPVectorizer/X86/phi-undef-input.ll | 12 +- .../SLPVectorizer/X86/reduction2.ll | 32 +- .../X86/remark_extract_broadcast.ll | 2 +- .../SLPVectorizer/X86/reorder_phi.ll | 36 +- .../X86/reorder_with_external_users.ll | 68 ++-- .../SLPVectorizer/X86/reused-undefs.ll | 4 +- .../X86/scatter-vectorize-reused-pointer.ll | 16 +- .../X86/vectorize-widest-phis.ll | 2 +- 40 files changed, 659 insertions(+), 733 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 53d5f67caff40..ba44d4a77ca3a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2267,14 +2267,13 @@ class BoUpSLP { /// Vectorize a single entry in the tree. Value *vectorizeTree(TreeEntry *E); - /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry - /// \p E. - Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx); + /// Vectorize a single entry in the tree, starting in \p VL. + Value *vectorizeTree(ArrayRef VL); /// Create a new vector from a list of scalar values. Produces a sequence /// which exploits values reused across lanes, and arranges the inserts /// for ease of later optimization. - Value *createBuildVector(const TreeEntry *E); + Value *createBuildVector(ArrayRef VL); /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. If \p @@ -2377,12 +2376,6 @@ class BoUpSLP { return IsSame(Scalars, ReuseShuffleIndices); } - bool isOperandGatherNode(const EdgeInfo &UserEI) const { - return State == TreeEntry::NeedToGather && - UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx && - UserTreeIndices.front().UserTE == UserEI.UserTE; - } - /// \returns true if current entry has same operands as \p TE. bool hasEqualOperands(const TreeEntry &TE) const { if (TE.getNumOperands() != getNumOperands()) @@ -3917,22 +3910,17 @@ static bool isRepeatedNonIdentityClusteredMask(ArrayRef Mask, } void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { - // Reorder reuses mask. - reorderReuses(TE.ReuseShuffleIndices, Mask); + // For vectorized and non-clustered reused - just reorder reuses mask. const unsigned Sz = TE.Scalars.size(); - // For vectorized and non-clustered reused no need to do anything else. - if (TE.State != TreeEntry::NeedToGather || + if (TE.State != TreeEntry::NeedToGather || !TE.ReorderIndices.empty() || !ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices, Sz) || - !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz)) + !isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz)) { + reorderReuses(TE.ReuseShuffleIndices, Mask); return; - SmallVector NewMask; - inversePermutation(TE.ReorderIndices, NewMask); - addMask(NewMask, TE.ReuseShuffleIndices); - // Clear reorder since it is going to be applied to the new mask. - TE.ReorderIndices.clear(); + } // Try to improve gathered nodes with clustered reuses, if possible. - reorderScalars(TE.Scalars, makeArrayRef(NewMask).slice(0, Sz)); + reorderScalars(TE.Scalars, makeArrayRef(TE.ReuseShuffleIndices).slice(0, Sz)); // Fill the reuses mask with the identity submasks. for (auto *It = TE.ReuseShuffleIndices.begin(), *End = TE.ReuseShuffleIndices.end(); @@ -8048,8 +8036,7 @@ class ShuffleInstructionBuilder { }; } // namespace -Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { - ArrayRef VL = E->getOperand(NodeIdx); +Value *BoUpSLP::vectorizeTree(ArrayRef VL) { const unsigned VF = VL.size(); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. @@ -8060,177 +8047,123 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { S = getSameOpcode(*It, *TLI); } if (S.getOpcode()) { - if (TreeEntry *VE = getTreeEntry(S.OpValue); VE && VE->isSame(VL)) { - assert((any_of(VE->UserTreeIndices, - [E, NodeIdx](const EdgeInfo &EI) { - return EI.EdgeIdx == NodeIdx && EI.UserTE == E; - }) || - any_of(VectorizableTree, - [E, NodeIdx, VE](const std::unique_ptr &TE) { - return TE->isOperandGatherNode({E, NodeIdx}) && - VE->isSame(TE->Scalars); - })) && - "Expected same vectorizable node."); - Value *V = vectorizeTree(VE); - if (VF != cast(V->getType())->getNumElements()) { - if (!VE->ReuseShuffleIndices.empty()) { - // Reshuffle to get only unique values. - // If some of the scalars are duplicated in the vectorization - // tree entry, we do not vectorize them but instead generate a - // mask for the reuses. But if there are several users of the - // same entry, they may have different vectorization factors. - // This is especially important for PHI nodes. In this case, we - // need to adapt the resulting instruction for the user - // vectorization factor and have to reshuffle it again to take - // only unique elements of the vector. Without this code the - // function incorrectly returns reduced vector instruction with - // the same elements, not with the unique ones. - - // block: - // %phi = phi <2 x > { .., %entry} {%shuffle, %block} - // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> - // ... (use %2) - // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} - // br %block - SmallVector UniqueIdxs(VF, UndefMaskElem); - SmallSet UsedIdxs; - int Pos = 0; - for (int Idx : VE->ReuseShuffleIndices) { - if (Idx != static_cast(VF) && Idx != UndefMaskElem && - UsedIdxs.insert(Idx).second) - UniqueIdxs[Idx] = Pos; - ++Pos; + if (TreeEntry *E = getTreeEntry(S.OpValue)) + if (E->isSame(VL)) { + Value *V = vectorizeTree(E); + if (VF != cast(V->getType())->getNumElements()) { + if (!E->ReuseShuffleIndices.empty()) { + // Reshuffle to get only unique values. + // If some of the scalars are duplicated in the vectorization tree + // entry, we do not vectorize them but instead generate a mask for + // the reuses. But if there are several users of the same entry, + // they may have different vectorization factors. This is especially + // important for PHI nodes. In this case, we need to adapt the + // resulting instruction for the user vectorization factor and have + // to reshuffle it again to take only unique elements of the vector. + // Without this code the function incorrectly returns reduced vector + // instruction with the same elements, not with the unique ones. + + // block: + // %phi = phi <2 x > { .., %entry} {%shuffle, %block} + // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> + // ... (use %2) + // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} + // br %block + SmallVector UniqueIdxs(VF, UndefMaskElem); + SmallSet UsedIdxs; + int Pos = 0; + int Sz = VL.size(); + for (int Idx : E->ReuseShuffleIndices) { + if (Idx != Sz && Idx != UndefMaskElem && + UsedIdxs.insert(Idx).second) + UniqueIdxs[Idx] = Pos; + ++Pos; + } + assert(VF >= UsedIdxs.size() && "Expected vectorization factor " + "less than original vector size."); + UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem); + V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); + } else { + assert(VF < cast(V->getType())->getNumElements() && + "Expected vectorization factor less " + "than original vector size."); + SmallVector UniformMask(VF, 0); + std::iota(UniformMask.begin(), UniformMask.end(), 0); + V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); + } + if (auto *I = dyn_cast(V)) { + GatherShuffleExtractSeq.insert(I); + CSEBlocks.insert(I->getParent()); } - assert(VF >= UsedIdxs.size() && "Expected vectorization factor " - "less than original vector size."); - UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem); - V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle"); - } else { - assert(VF < cast(V->getType())->getNumElements() && - "Expected vectorization factor less " - "than original vector size."); - SmallVector UniformMask(VF, 0); - std::iota(UniformMask.begin(), UniformMask.end(), 0); - V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); - } - if (auto *I = dyn_cast(V)) { - GatherShuffleExtractSeq.insert(I); - CSEBlocks.insert(I->getParent()); } + return V; } - return V; - } } - // Find the corresponding gather entry and vectorize it. - // Allows to be more accurate with tree/graph transformations, checks for the - // correctness of the transformations in many cases. - auto *I = find_if(VectorizableTree, - [E, NodeIdx](const std::unique_ptr &TE) { - return TE->isOperandGatherNode({E, NodeIdx}); - }); - assert(I != VectorizableTree.end() && "Gather node is not in the graph."); - assert(I->get()->UserTreeIndices.size() == 1 && - "Expected only single user for the gather node."); - assert(I->get()->isSame(VL) && "Expected same list of scalars."); - return vectorizeTree(I->get()); + // Can't vectorize this, so simply build a new vector with each lane + // corresponding to the requested value. + return createBuildVector(VL); } - -Value *BoUpSLP::createBuildVector(const TreeEntry *E) { - assert(E->State == TreeEntry::NeedToGather && "Expected gather node."); - unsigned VF = E->getVectorFactor(); - - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, - CSEBlocks); - SmallVector Gathered( - VF, PoisonValue::get(E->Scalars.front()->getType())); - bool NeedFreeze = false; - SmallVector VL(E->Scalars.begin(), E->Scalars.end()); - // Build a mask out of the redorder indices and reorder scalars per this mask. - SmallVector ReorderMask; - inversePermutation(E->ReorderIndices, ReorderMask); - if (!ReorderMask.empty()) - reorderScalars(VL, ReorderMask); - if (!allConstant(VL)) { - // For splats with can emit broadcasts instead of gathers, so try to find - // such sequences. - bool IsSplat = isSplat(VL) && (VL.size() > 2 || VL.front() == VL.back()); - SmallVector ReuseMask(VF, UndefMaskElem); - SmallVector UndefPos; +Value *BoUpSLP::createBuildVector(ArrayRef VL) { + assert(any_of(VectorizableTree, + [VL](const std::unique_ptr &TE) { + return TE->State == TreeEntry::NeedToGather && TE->isSame(VL); + }) && + "Non-matching gather node."); + unsigned VF = VL.size(); + // Exploit possible reuse of values across lanes. + SmallVector ReuseShuffleIndicies; + SmallVector UniqueValues; + if (VL.size() > 2) { DenseMap UniquePositions; - // Gather unique non-const values and all constant values. - // For repeated values, just shuffle them. - for (auto [I, V] : enumerate(VL)) { + unsigned NumValues = + std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) { + return !isa(V); + }).base()); + VF = std::max(VF, PowerOf2Ceil(NumValues)); + int UniqueVals = 0; + for (Value *V : VL.drop_back(VL.size() - VF)) { if (isa(V)) { - if (!isa(V)) { - Gathered[I] = V; - ReuseMask[I] = I; - UndefPos.push_back(I); - } + ReuseShuffleIndicies.emplace_back(UndefMaskElem); continue; } if (isConstant(V)) { - Gathered[I] = V; - ReuseMask[I] = I; + ReuseShuffleIndicies.emplace_back(UniqueValues.size()); + UniqueValues.emplace_back(V); continue; } - if (IsSplat) { - Gathered.front() = V; - ReuseMask[I] = 0; - } else { - const auto Res = UniquePositions.try_emplace(V, I); - Gathered[Res.first->second] = V; - ReuseMask[I] = Res.first->second; - } - } - if (!UndefPos.empty() && IsSplat) { - // For undef values, try to replace them with the simple broadcast. - // We can do it if the broadcasted value is guaranteed to be - // non-poisonous, or by freezing the incoming scalar value first. - auto *It = find_if(Gathered, [this, E](Value *V) { - return !isa(V) && - (getTreeEntry(V) || isGuaranteedNotToBePoison(V) || - any_of(V->uses(), [E](const Use &U) { - // Check if the value already used in the same operation in - // one of the nodes already. - return E->UserTreeIndices.size() == 1 && - is_contained( - E->UserTreeIndices.front().UserTE->Scalars, - U.getUser()) && - E->UserTreeIndices.front().EdgeIdx != U.getOperandNo(); - })); - }); - if (It != Gathered.end()) { - // Replace undefs by the non-poisoned scalars and emit broadcast. - int Pos = std::distance(Gathered.begin(), It); - for_each(UndefPos, [&](int I) { - // Set the undef position to the non-poisoned scalar. - ReuseMask[I] = Pos; - // Replace the undef by the poison, in the mask it is replaced by non-poisoned scalar already. - if (I != Pos) - Gathered[I] = PoisonValue::get(Gathered[I]->getType()); - }); - } else { - // Replace undefs by the poisons, emit broadcast and then emit - // freeze. - for_each(UndefPos, [&](int I) { - ReuseMask[I] = UndefMaskElem; - if (isa(Gathered[I])) - Gathered[I] = PoisonValue::get(Gathered[I]->getType()); - }); - NeedFreeze = true; + auto Res = UniquePositions.try_emplace(V, UniqueValues.size()); + ReuseShuffleIndicies.emplace_back(Res.first->second); + if (Res.second) { + UniqueValues.emplace_back(V); + ++UniqueVals; } } - ShuffleBuilder.addMask(ReuseMask); - } else { - copy(VL, Gathered.begin()); - } - // Gather unique scalars and all constants. - Value *Vec = gather(Gathered); - ShuffleBuilder.addMask(E->ReuseShuffleIndices); - Vec = ShuffleBuilder.finalize(Vec); - if (NeedFreeze) - Vec = Builder.CreateFreeze(Vec); + if (UniqueVals == 1 && UniqueValues.size() == 1) { + // Emit pure splat vector. + ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(), + UndefMaskElem); + } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) { + if (UniqueValues.empty()) { + assert(all_of(VL, UndefValue::classof) && "Expected list of undefs."); + NumValues = VF; + } + ReuseShuffleIndicies.clear(); + UniqueValues.clear(); + UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); + } + UniqueValues.append(VF - UniqueValues.size(), + PoisonValue::get(VL[0]->getType())); + VL = UniqueValues; + } + + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, + CSEBlocks); + Value *Vec = gather(VL); + if (!ReuseShuffleIndicies.empty()) { + ShuffleBuilder.addMask(ReuseShuffleIndicies); + Vec = ShuffleBuilder.finalize(Vec); + } return Vec; } @@ -8247,13 +8180,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq, CSEBlocks); if (E->State == TreeEntry::NeedToGather) { - if (E->Idx > 0) { - // We are in the middle of a vectorizable chain. We need to gather the - // scalars from the users. - Value *Vec = createBuildVector(E); - E->VectorizedValue = Vec; - return Vec; - } if (E->getMainOp()) setInsertPointAfterBundle(E); Value *Vec; @@ -8330,7 +8256,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Builder.SetInsertPoint(IBB->getTerminator()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - Value *Vec = vectorizeOperand(E, i); + Value *Vec = vectorizeTree(E->getOperand(i)); NewPhi->addIncoming(Vec, IBB); } @@ -8364,7 +8290,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::InsertElement: { assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique"); Builder.SetInsertPoint(cast(E->Scalars.back())); - Value *V = vectorizeOperand(E, 1); + Value *V = vectorizeTree(E->getOperand(1)); // Create InsertVector shuffle if necessary auto *FirstInsert = cast(*find_if(E->Scalars, [E](Value *V) { @@ -8470,7 +8396,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::BitCast: { setInsertPointAfterBundle(E); - Value *InVec = vectorizeOperand(E, 0); + Value *InVec = vectorizeTree(E->getOperand(0)); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8491,8 +8417,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::ICmp: { setInsertPointAfterBundle(E); - Value *L = vectorizeOperand(E, 0); - Value *R = vectorizeOperand(E, 1); + Value *L = vectorizeTree(E->getOperand(0)); + Value *R = vectorizeTree(E->getOperand(1)); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8513,9 +8439,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Select: { setInsertPointAfterBundle(E); - Value *Cond = vectorizeOperand(E, 0); - Value *True = vectorizeOperand(E, 1); - Value *False = vectorizeOperand(E, 2); + Value *Cond = vectorizeTree(E->getOperand(0)); + Value *True = vectorizeTree(E->getOperand(1)); + Value *False = vectorizeTree(E->getOperand(2)); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8534,7 +8460,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::FNeg: { setInsertPointAfterBundle(E); - Value *Op = vectorizeOperand(E, 0); + Value *Op = vectorizeTree(E->getOperand(0)); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8576,8 +8502,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::Xor: { setInsertPointAfterBundle(E); - Value *LHS = vectorizeOperand(E, 0); - Value *RHS = vectorizeOperand(E, 1); + Value *LHS = vectorizeTree(E->getOperand(0)); + Value *RHS = vectorizeTree(E->getOperand(1)); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -8624,7 +8550,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } } else { assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); - Value *VecPtr = vectorizeOperand(E, 0); + Value *VecPtr = vectorizeTree(E->getOperand(0)); // Use the minimum alignment of the gathered loads. Align CommonAlignment = LI->getAlign(); for (Value *V : E->Scalars) @@ -8647,7 +8573,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E); - Value *VecValue = vectorizeOperand(E, 0); + Value *VecValue = vectorizeTree(E->getOperand(0)); ShuffleBuilder.addMask(E->ReorderIndices); VecValue = ShuffleBuilder.finalize(VecValue); @@ -8678,11 +8604,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { auto *GEP0 = cast(VL0); setInsertPointAfterBundle(E); - Value *Op0 = vectorizeOperand(E, 0); + Value *Op0 = vectorizeTree(E->getOperand(0)); SmallVector OpVecs; for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) { - Value *OpVec = vectorizeOperand(E, J); + Value *OpVec = vectorizeTree(E->getOperand(J)); OpVecs.push_back(OpVec); } @@ -8736,7 +8662,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { continue; } - Value *OpVec = vectorizeOperand(E, j); + Value *OpVec = vectorizeTree(E->getOperand(j)); LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (isVectorIntrinsicWithOverloadTypeAtArg(IID, j)) @@ -8791,11 +8717,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *LHS = nullptr, *RHS = nullptr; if (Instruction::isBinaryOp(E->getOpcode()) || isa(VL0)) { setInsertPointAfterBundle(E); - LHS = vectorizeOperand(E, 0); - RHS = vectorizeOperand(E, 1); + LHS = vectorizeTree(E->getOperand(0)); + RHS = vectorizeTree(E->getOperand(1)); } else { setInsertPointAfterBundle(E); - LHS = vectorizeOperand(E, 0); + LHS = vectorizeTree(E->getOperand(0)); } if (E->VectorizedValue) { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll index 9889d8d9e444a..967a4dca29ce5 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/matmul.ll @@ -25,42 +25,42 @@ define void @wrap_mul4(double* nocapture %Out, [2 x double]* nocapture readonly ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[ARRAYIDX3_I]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TEMP]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[TEMP]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7_I]] to <2 x double>* +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[TEMP2]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[TEMP2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[OUT:%.*]] to <2 x double>* ; CHECK-NEXT: [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 2 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>* -; CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[TMP11]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>* -; CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[TMP14]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[SHUFFLE1]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP13]], [[TMP16]] -; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[ARRAYIDX25_I]] to <2 x double>* +; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, <2 x double>* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> [[TMP4]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[ARRAYIDX30_I]] to <2 x double>* +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[TMP16]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP9]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP15]], [[TMP18]] +; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[RES_I_SROA_5_0_OUT2_I_SROA_IDX4]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP19]], <2 x double>* [[TMP20]], align 8 ; CHECK-NEXT: [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0 -; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE4]] -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0 -; CHECK-NEXT: [[SHUFFLE5:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE5]] -; CHECK-NEXT: [[TMP23:%.*]] = fadd <2 x double> [[TMP20]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP23]], <2 x double>* [[TMP24]], align 8 -; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6 -; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[TMP12]], [[SHUFFLE4]] -; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP15]], [[SHUFFLE5]] -; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP25]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>* +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> poison, double [[TEMP10]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[TMP21]], double [[TEMP10]], i32 1 +; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[TMP2]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x double> poison, double [[TEMP11]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x double> [[TMP24]], double [[TEMP11]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = fmul <2 x double> [[TMP7]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = fadd <2 x double> [[TMP23]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast double* [[RES_I_SROA_7_0_OUT2_I_SROA_IDX8]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[TMP28]], align 8 +; CHECK-NEXT: [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 6 +; CHECK-NEXT: [[TMP29:%.*]] = fmul <2 x double> [[TMP14]], [[TMP22]] +; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[TMP17]], [[TMP25]] +; CHECK-NEXT: [[TMP31:%.*]] = fadd <2 x double> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = bitcast double* [[RES_I_SROA_9_0_OUT2_I_SROA_IDX12]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[TMP32]], align 8 ; CHECK-NEXT: ret void ; %arrayidx1.i = getelementptr inbounds [2 x double], [2 x double]* %A, i64 0, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll index 3327cb0e51a8e..9a0b68f0dc640 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll @@ -10,18 +10,18 @@ define void @slp_not_profitable_with_fast_fmf(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -50,18 +50,18 @@ define void @slp_not_profitable_with_reassoc_fmf(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub reassoc <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd reassoc <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub reassoc <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -91,18 +91,18 @@ define void @slp_profitable_missing_fmf_on_fadd_fsub(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[SHUFFLE1]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -132,18 +132,18 @@ define void @slp_profitable_missing_fmf_on_fmul_fadd_fsub(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE1]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -173,18 +173,18 @@ define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) { ; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan <2 x float> [[SHUFFLE1]], [[TMP1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP6:%.*]] = fsub nnan <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd nnan <2 x float> [[TMP5]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[A]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -; CHECK-NEXT: store float [[TMP9]], ptr [[B]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub nnan <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd nnan <2 x float> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4 ; CHECK-NEXT: ret void ; %gep.B.1 = getelementptr inbounds float, ptr %B, i64 1 @@ -267,16 +267,16 @@ define void @slp_profitable(ptr %A, ptr %B, float %0) { ; CHECK-NEXT: [[SUB_I1096:%.*]] = fsub fast float 1.000000e+00, [[TMP0:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0 -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[TMP1]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[SHUFFLE1]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x float> [[SHUFFLE1]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <2 x i32> -; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[SUB_I1096]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[SHUFFLE]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x float> [[SHUFFLE]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> +; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[B:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll index d9f33c2b5b6f8..9f2bf46d6f0d0 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/splat-loads.ll @@ -16,14 +16,14 @@ define void @splat_loads_double(double *%array1, double *%array2, double *%ptrA, ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -63,14 +63,14 @@ define void @splat_loads_float(float *%array1, float *%array2, float *%ptrA, flo ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[LD_2_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP1]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP6]], <2 x float>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[LD_2_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[LD_2_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[LD_2_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[GEP_1_0]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -110,14 +110,14 @@ define void @splat_loads_i64(i64 *%array1, i64 *%array2, i64 *%ptrA, i64 *%ptrB) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP1]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[LD_2_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LD_2_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[LD_2_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[GEP_1_0]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -157,14 +157,14 @@ define void @splat_loads_i32(i32 *%array1, i32 *%array2, i32 *%ptrA, i32 *%ptrB) ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LD_2_0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LD_2_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[LD_2_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i32> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[GEP_1_0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index b96862f0d0b21..64c1af4c3035d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -19,22 +19,23 @@ define void @s116_modified(float* %a) { ; CHECK-LABEL: @s116_modified( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 4 +; CHECK-NEXT: [[LD1:%.*]] = load float, float* [[GEP1]], align 4 ; CHECK-NEXT: [[LD0:%.*]] = load float, float* [[GEP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP2]] to <2 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>* -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x float> [[TMP9]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[GEP0]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP12]], align 4 +; CHECK-NEXT: [[LD4:%.*]] = load float, float* [[GEP4]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[GEP0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4 ; CHECK-NEXT: ret void ; %gep0 = getelementptr inbounds float, float* %a, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 7eedbe98d4fa9..f1241370626fb 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -518,10 +518,10 @@ define void @select_uniform_eq_2xi32(i32* %ptr, i32 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[PTR]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 2 ; CHECK-NEXT: ret void ; entry: @@ -585,10 +585,10 @@ define void @select_uniform_ne_2xi64(i64* %ptr, i64 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[SHUFFLE]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[X]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[PTR]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 2b792da557188..01ba01ecc2bcb 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -60,11 +60,11 @@ define void @extracts_first_2_lanes_different_vectors(<2 x double>* %ptr.1, <4 x ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V3_LANE_1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V3_LANE_1]]) -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -102,12 +102,12 @@ define void @noop_extract_second_2_lanes(<4 x double>* %ptr.1, <4 x double>* %pt ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_3]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -139,14 +139,14 @@ define void @extract_reverse_order(<2 x double>* %ptr.1, <4 x double>* %ptr.2) { ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, <4 x double>* [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[V_1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[V_1]], i32 0 -; CHECK-NEXT: call void @use(double [[TMP3]]) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[V_1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[V_1]], i32 0 ; CHECK-NEXT: call void @use(double [[TMP4]]) -; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[V_1]], i32 1 +; CHECK-NEXT: call void @use(double [[TMP5]]) +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -182,12 +182,12 @@ define void @extract_lanes_1_and_2(<4 x double>* %ptr.1, <4 x double>* %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V1_LANE_1]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1_LANE_2]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2_LANE_2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) -; CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: @@ -288,15 +288,15 @@ define void @extracts_jumbled_4_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 3 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) -; CHECK-NEXT: store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8 +; CHECK-NEXT: store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll index d4ada814be303..7ab2df33692ef 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll @@ -115,10 +115,10 @@ define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar, ; GCN-NEXT: [[TMP1:%.*]] = bitcast half addrspace(3)* [[A:%.*]] to <2 x half> addrspace(3)* ; GCN-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half> addrspace(3)* [[TMP1]], align 2 ; GCN-NEXT: [[TMP3:%.*]] = insertelement <2 x half> poison, half [[SCALAR:%.*]], i32 0 -; GCN-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x half> [[TMP3]], <2 x half> poison, <2 x i32> zeroinitializer -; GCN-NEXT: [[TMP4:%.*]] = fmul <2 x half> [[TMP2]], [[SHUFFLE]] -; GCN-NEXT: [[TMP5:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)* -; GCN-NEXT: store <2 x half> [[TMP4]], <2 x half> addrspace(3)* [[TMP5]], align 2 +; GCN-NEXT: [[TMP4:%.*]] = insertelement <2 x half> [[TMP3]], half [[SCALAR]], i32 1 +; GCN-NEXT: [[TMP5:%.*]] = fmul <2 x half> [[TMP2]], [[TMP4]] +; GCN-NEXT: [[TMP6:%.*]] = bitcast half addrspace(3)* [[C:%.*]] to <2 x half> addrspace(3)* +; GCN-NEXT: store <2 x half> [[TMP5]], <2 x half> addrspace(3)* [[TMP6]], align 2 ; GCN-NEXT: ret void ; %i0 = load half, half addrspace(3)* %a, align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll index d6e016e62266e..6c2cd2aa43c4a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -9,17 +9,17 @@ define { i64, i64 } @patatino(double %arg) { ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP0]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = fptosi <2 x double> [[TMP6]] to <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i32> [[TMP7]] to <2 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0 -; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1 -; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: [[T16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[T17:%.*]] = insertvalue { i64, i64 } [[T16]], i64 [[TMP11]], 1 ; CHECK-NEXT: ret { i64, i64 } [[T17]] ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll index 2f0bcfc4b5d85..cb50607383cd7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -6,17 +6,17 @@ define void @Test(i32) { ; CHECK-LABEL: @Test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> poison, i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE7:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP14:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE7]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE8]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[SHUFFLE6]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[SHUFFLE7]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[TMP8]] @@ -25,10 +25,10 @@ define void @Test(i32) { ; CHECK-NEXT: [[OP_RDX4:%.*]] = and i32 [[OP_RDX2]], [[OP_RDX3]] ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> , i32 [[OP_RDX4]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[SHUFFLE6:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP9]], [[SHUFFLE6]] -; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP9]], [[SHUFFLE6]] -; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i32> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP14]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> ; CHECK-NEXT: br label [[LOOP]] ; ; FORCE_REDUCTION-LABEL: @Test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll index 6fda6be46eec2..c7a8392defc14 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll @@ -5,9 +5,9 @@ define i16 @test(i16 %call37) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = load i16, i16* undef, align 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37:%.*]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> , i16 [[CALL37:%.*]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <8 x i16> [[SHUFFLE]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll index e3e6910b931ee..4eeb422caabaa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -19,9 +19,8 @@ define void @bcast_long(i32 *%A, i32 *%S) { ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = freeze <8 x i32> [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* -; CHECK-NEXT: store <8 x i32> [[TMP1]], <8 x i32>* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>* +; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 79a4054b63a2b..0f15a06eb5ab7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -46,11 +46,11 @@ define void @test(float %a) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[A]], i32 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> zeroinitializer, [[SHUFFLE]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll index 6b104f33e0c6a..7e241144afc53 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll @@ -71,19 +71,19 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x double> [[TMP28]], double [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP30:%.*]] = fsub <2 x double> [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x double> poison, double [[MUL88]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP32:%.*]] = fdiv <2 x double> [[TMP30]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP32]], i32 1 -; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP33]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP32]], i32 0 -; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[TMP31]], double [[MUL88]], i32 1 +; CHECK-NEXT: [[TMP33:%.*]] = fdiv <2 x double> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP33]], i32 1 +; CHECK-NEXT: [[CMP93:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i32 0 +; CHECK-NEXT: [[CMP94:%.*]] = fcmp olt double [[TMP35]], 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP35:%.*]] = fcmp ule <2 x double> [[TMP32]], -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP35]], i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP35]], i32 1 -; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP37]], i1 true, i1 [[TMP36]] +; CHECK-NEXT: [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1 +; CHECK-NEXT: [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]] ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = zext i1 [[OR_COND106]] to i32 ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll index 600cf2539ced4..bdbcc5cb51b8f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll @@ -14,10 +14,10 @@ define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, doub ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[G]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[TMP4]], <2 x double> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll index abe08f7c693bb..67ca7282b002d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -97,9 +97,9 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1 ; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2 -; AVX-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> -; AVX-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]] -; AVX-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3 +; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]] +; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; AVX-NEXT: ret void ; %add1 = add i32 %c, %a diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll index 10e1d9123a7f6..c1434c78129ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -11,20 +11,20 @@ define void @reduce_compare(double* nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] ; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index d9655b7444005..9e981230e5862 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -12,32 +12,32 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ACC1_056:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD13:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[SRC:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store float [[ACC1_056]], float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[SHUFFLE]] -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> , <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 -; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> -; CHECK-NEXT: [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], -; CHECK-NEXT: [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> , <2 x float> [[TMP16]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x float> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP6]], <2 x float> +; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x float> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x float> , <2 x float> [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 +; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], +; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> +; CHECK-NEXT: [[TMP18:%.*]] = fcmp olt <2 x float> [[TMP17]], +; CHECK-NEXT: [[TMP19]] = select <2 x i1> [[TMP18]], <2 x float> , <2 x float> [[TMP17]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 65817d74ee54f..7e92b130d307b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -5,19 +5,19 @@ define void @exceed(double %0, double %1) { ; CHECK-LABEL: @exceed( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP5]], undef +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP5]], undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef ; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef @@ -27,16 +27,16 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef ; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP6]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP4]], <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[TMP12]], undef +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] ; CHECK-NEXT: ] @@ -45,7 +45,7 @@ define void @exceed(double %0, double %1) { ; CHECK: bb2: ; CHECK-NEXT: br label [[LABEL]] ; CHECK: label: -; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x double> [ [[TMP10]], [[BB1]] ], [ [[TMP13]], [[BB2]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15]], [[BB2]] ] ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll index 3e7896b5b4f4e..c8d073fa243b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll @@ -246,22 +246,22 @@ define i32 @partial_mrg(double* nocapture %A, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[A]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], 4 ; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4 ; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP2]], double [[CONV12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX7]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret i32 0 @@ -352,18 +352,18 @@ define void @cse_for_hoisted_instructions_in_preheader(i32* %dst, i32 %a, i1 %c) ; CHECK-LABEL: @cse_for_hoisted_instructions_in_preheader( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A]], i32 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> , [[TMP1]] ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[GEP_0]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP1]], ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 10 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[GEP_2]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index c3b98765d6e45..da4c2424066c5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -6,10 +6,11 @@ define i64 @foo(i32 %tmp7) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[TMP7:%.*]], i32 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 6 -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> , [[SHUFFLE]] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll index 0c4ff04209f04..ec36710fdb3a5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -11,12 +11,12 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP6]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -94,15 +94,15 @@ define void @externally_used_ptrs() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64*> [[TMP1]], <2 x i64*> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, <2 x i64*> [[SHUFFLE]], <2 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <2 x i64*> [[TMP2]] to <2 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64*> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP3]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll index 33959dc48f296..fc1ab867faa54 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multiple-uses.ll @@ -16,11 +16,11 @@ define float @multi_uses(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @multi_uses( ; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[Y1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll index e24d3a6c6776e..2d8707ea68c2d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll @@ -84,21 +84,21 @@ define float @f_used_twice_in_tree(<2 x float> %x) { ; THRESH1-LABEL: @f_used_twice_in_tree( ; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 ; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] -; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] +; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] +; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH1-NEXT: ret float [[ADD]] ; ; THRESH2-LABEL: @f_used_twice_in_tree( ; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 ; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0 -; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]] -; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 -; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 -; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]] +; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 +; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]] +; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]] ; THRESH2-NEXT: ret float [[ADD]] ; %x0 = extractelement <2 x float> %x, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index cd113e89ada0e..b15d3f70b2317 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -769,11 +769,11 @@ define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP1]]) ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0 -; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP3]], [[SHUFFLE]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP6]], [[TMP7]] +; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[CONV]], i32 1 +; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP3]], [[TMP5]] +; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]] ; THRESHOLD-NEXT: ret float [[OP_RDX2]] ; entry: @@ -897,11 +897,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 ; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[CONVC]], i32 1 ; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[CONV]], i32 0 -; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> zeroinitializer -; THRESHOLD-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> [[TMP4]], [[SHUFFLE]] -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 -; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 -; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP7]], [[TMP8]] +; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1 +; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP4]], [[TMP6]] +; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0 +; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1 +; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], 3.000000e+00 ; THRESHOLD-NEXT: ret float [[OP_RDX3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll index 36c1a8d4fc071..e703928a8077a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll @@ -12,21 +12,21 @@ define void @in_tree_user(double* nocapture %A, i32 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[CONV]], i32 1 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[SHUFFLE]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] ; CHECK-NEXT: br i1 [[CMP11]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll index d301ff8734cc0..3cc92d2030e66 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -12,9 +12,10 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> , float [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll index b626a2f84cd3d..225155d93cb58 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll @@ -9,11 +9,10 @@ define i32 @fn1() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 168ffb62cdcc0..e25361f7737d5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -444,14 +444,14 @@ define void @ChecksExtractScores(double* %storeArray, double* %array, <2 x doubl ; CHECK-NEXT: [[LOADVEC2:%.*]] = load <2 x double>, <2 x double>* [[VECPTR2:%.*]], align 4 ; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[LOADVEC]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[LOADVEC2]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[LOADA0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[LOADVEC]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[LOADA1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[LOADVEC2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 ; CHECK-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 @@ -669,16 +669,16 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double* ; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[EXTRA1]], i32 1 ; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[LOADA0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP2]], [[SHUFFLE]] -; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 -; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[EXTRB1]], i32 1 -; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP4]], [[TMP8]] -; AVX-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* -; AVX-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 +; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[LOADA0]], i32 1 +; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0 +; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1 +; AVX-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[LOADA1]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[LOADA1]], i32 1 +; AVX-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]] +; AVX-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP5]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = bitcast double* [[SIDX0]] to <2 x double>* +; AVX-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 ; AVX-NEXT: ret void ; %idx0 = getelementptr inbounds double, double* %array, i64 0 @@ -739,15 +739,15 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub ; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]] +; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 +; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 +; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; AVX-NEXT: [[ADD3:%.*]] = fadd double [[TMP9]], [[TMP10]] ; AVX-NEXT: ret double [[ADD3]] ; entry: @@ -789,11 +789,11 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] ; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]] -; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer -; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE1]] -; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]] +; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]] ; SSE-NEXT: ret double [[RES]] ; ; AVX-LABEL: @splat_loads_with_internal_uses( @@ -806,16 +806,16 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2, ; AVX-NEXT: [[TMP0:%.*]] = bitcast double* [[GEP_1_0]] to <2 x double>* ; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; AVX-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD_2_0]], i32 0 -; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]] -; AVX-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 -; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer -; AVX-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE1]] -; AVX-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP5]] -; AVX-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP6]], [[SHUFFLE]] -; AVX-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; AVX-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP8]], [[TMP9]] +; AVX-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD_2_0]], i32 1 +; AVX-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] +; AVX-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD_2_1]], i32 0 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD_2_1]], i32 1 +; AVX-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP1]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP4]], [[TMP7]] +; AVX-NEXT: [[TMP9:%.*]] = fsub <2 x double> [[TMP8]], [[TMP3]] +; AVX-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 +; AVX-NEXT: [[RES:%.*]] = fadd double [[TMP10]], [[TMP11]] ; AVX-NEXT: ret double [[RES]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 118372d2d5898..e6cad5c9a88c6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -10,18 +10,18 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef ; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> , i32 [[SUB102_1]], i32 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 6 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> , i32 [[SUB86_1]], i32 4 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 5 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 7 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 12 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[ADD94_1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll index 604b833197893..3b75850b87082 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll @@ -17,21 +17,21 @@ define void @f(i1 %x) #0 { ; CHECK-NEXT: [[ICMP_A1:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @b to <2 x i64>*), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i1> poison, i1 [[ICMP_A1]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[SHUFFLE]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP3]], i1 [[ICMP_A1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i64> [[TMP2]], <2 x i64> [[TMP0]] ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_BODY_LR_PH]] ] -; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP7]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP5]], [[WHILE_BODY_LR_PH]] ] +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (%struct.a* @c to <2 x i64>*), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP7]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP5]], <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i64> [[TMP9]], [[TMP6]] -; CHECK-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 +; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index a7b494e007cc4..b7fb6c0176127 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -17,24 +17,23 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef -; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]] +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]] ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll index 88f75c37846ef..2a7e6d6697061 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll @@ -14,7 +14,7 @@ define i32 @phi3UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -51,7 +51,7 @@ define i32 @phi2UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -88,7 +88,7 @@ define i32 @phi1UndefInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -126,7 +126,7 @@ define i32 @phi1Undef1PoisonInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %ar ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -164,7 +164,7 @@ define i32 @phi1Undef2PoisonInputs(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 %a ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] @@ -201,7 +201,7 @@ define i32 @phi1Undef1PoisonGapInput(i1 %cond, i8 %arg0, i8 %arg1, i8 %arg2, i8 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: ret i32 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index fcea56b282fd6..fdc8b10100572 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -94,19 +94,19 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = fdiv <2 x double> [[TMP4]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP7]], 0x3EB0C6F7A0B5ED8D -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[TMP8]], 0x3EB0C6F7A0B5ED8D +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt double [[TMP9]], 0x3EB0C6F7A0B5ED8D ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[TMP9:%.*]] = fcmp ule <2 x double> [[TMP6]], -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]] ; CHECK-NEXT: ret i1 [[NOT_OR_COND9]] ; CHECK: cleanup: ; CHECK-NEXT: ret i1 false @@ -143,12 +143,12 @@ define i1 @fcmp_lt(double %a, double %b, double %c) { ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = fdiv <2 x double> [[TMP5]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[MUL]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-NEXT: [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]] ; CHECK-NEXT: ret i1 [[NOT_OR_COND]] ; %fneg = fneg double %b diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index 873948c9596f5..d25c77ca34841 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -9,7 +9,7 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* ; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll index c43753d995a77..591537e4e37ea 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll @@ -9,8 +9,8 @@ define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R ; CHECK-NEXT: [[TMP0:%.*]] = add i64 256, 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP20:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ zeroinitializer, [[ENTRY]] ], [ [[TMP19:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX:%.*]], %struct.complex* [[A:%.*]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[B:%.*]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP4]], align 4 @@ -19,23 +19,23 @@ define void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %R ; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <2 x float>* ; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP9]], [[SHUFFLE1]] -; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x float> [[TMP11]], [[SHUFFLE2]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x i32> -; CHECK-NEXT: [[TMP17]] = fadd <2 x float> [[TMP2]], [[TMP16]] -; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[TMP0]] -; CHECK-NEXT: br i1 [[TMP19]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[TMP9]], [[TMP14]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP15]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = fsub <2 x float> [[TMP12]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x float> [[TMP12]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> [[TMP17]], <2 x i32> +; CHECK-NEXT: [[TMP19]] = fadd <2 x float> [[TMP2]], [[TMP18]] +; CHECK-NEXT: [[TMP20]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP20]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP21]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP17]], <2 x float>* [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_COMPLEX]], %struct.complex* [[RESULT:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[TMP23]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll index 1b31f8a3a98d8..aae0a078692f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -13,18 +13,18 @@ define void @rotate_with_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], ; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRA1]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PTRA1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP7]], [[TMP6]] ; CHECK-NEXT: ret void ; bb1: @@ -117,22 +117,22 @@ define void @addsub_and_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], ; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[SHUFFLE1]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP10]] ; CHECK-NEXT: ret void ; bb1: @@ -167,21 +167,21 @@ define void @subadd_and_external_users(double *%A, double *%ptr) { ; CHECK-NEXT: bb1: ; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], ; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[PTRA0]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP5]], -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 -; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP6]], +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP10]], [[TMP9]] ; CHECK-NEXT: ret void ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll index ce6a477f30a08..8e26c28219788 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll @@ -6,12 +6,12 @@ define i32 @main(i32 %0) { ; CHECK-NEXT: for.cond.preheader: ; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] ; CHECK: for.inc.preheader: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> , i32 [[TMP0:%.*]], i32 6 ; CHECK-NEXT: br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: br label [[L1_PREHEADER]] ; CHECK: L1.preheader: ; CHECK-NEXT: [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll index 7438f4e6a5cdc..b7d6827e25123 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -13,16 +13,16 @@ define void @test(i1 %c, ptr %arg) { ; CHECK: else: ; CHECK-NEXT: [[ARG_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[ARG]], i32 0 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> , <4 x i64> poison) +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARG]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> [[TMP9]], ptr [[ARG_1]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP10]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP10]], [[ELSE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ [[TMP3]], [[IF]] ], [ [[TMP11]], [[ELSE]] ] ; CHECK-NEXT: ret void ; br i1 %c, label %if, label %else diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll index 9f5b8ca0e8d1e..28759385d5539 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll @@ -8,7 +8,7 @@ define void @foo() { ; CHECK-NEXT: [[SUB:%.*]] = fsub float 6.553500e+04, undef ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> , float [[SUB]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: From 6ebca0302126c43ec8614d26aa444060e7a6da76 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Nov 2022 17:40:30 +0100 Subject: [PATCH 433/516] [Clang] Update test after wasm intrinsics attribute change (NFC) I missed this test in d35fcf0e97e7bb02381506a71e61ec282b292c50. --- clang/test/CodeGenCXX/wasm-eh.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenCXX/wasm-eh.cpp b/clang/test/CodeGenCXX/wasm-eh.cpp index e965768bf834f..27752f5f58036 100644 --- a/clang/test/CodeGenCXX/wasm-eh.cpp +++ b/clang/test/CodeGenCXX/wasm-eh.cpp @@ -34,7 +34,7 @@ void test0() { // CHECK-NEXT: %[[EXN:.*]] = call ptr @llvm.wasm.get.exception(token %[[CATCHPAD]]) // CHECK-NEXT: store ptr %[[EXN]], ptr %exn.slot // CHECK-NEXT: %[[SELECTOR:.*]] = call i32 @llvm.wasm.get.ehselector(token %[[CATCHPAD]]) -// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #2 +// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #8 // CHECK-NEXT: %[[MATCHES:.*]] = icmp eq i32 %[[SELECTOR]], %[[TYPEID]] // CHECK-NEXT: br i1 %[[MATCHES]], label %[[CATCH_INT_BB:.*]], label %[[CATCH_FALLTHROUGH_BB:.*]] @@ -51,7 +51,7 @@ void test0() { // CHECK-NEXT: br label %[[TRY_CONT_BB:.*]] // CHECK: [[CATCH_FALLTHROUGH_BB]] -// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTId) #2 +// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTId) #8 // CHECK-NEXT: %[[MATCHES:.*]] = icmp eq i32 %[[SELECTOR]], %[[TYPEID]] // CHECK-NEXT: br i1 %[[MATCHES]], label %[[CATCH_FLOAT_BB:.*]], label %[[RETHROW_BB:.*]] From 708185f03ff480b3481132802b7b63461564f0ab Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Sat, 5 Nov 2022 09:43:24 -0600 Subject: [PATCH 434/516] [mlir][NVGPU] Add support for structured sparsity MMA variants This change adds a new NVGPU operation that targets the PTX `mma.sp.sync` instruction variants. A lowering to NVVM is provided using inline assembly. Reviewed By: ThomasRaoux, manishucsd Differential Revision: https://reviews.llvm.org/D137202 --- mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td | 78 +++++- .../Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp | 249 +++++++++++++++--- mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp | 108 +++++--- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 116 ++++++++ mlir/test/Dialect/NVGPU/roundtrip.mlir | 38 +++ 5 files changed, 511 insertions(+), 78 deletions(-) diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index 138ffc896cb2a..db4ee53252fb3 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -98,10 +98,24 @@ def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", [ let hasVerifier = 1; } -def NVGPU_MmaSyncOp : NVGPU_Op<"mma.sync", [ - Pure, - PredOpTrait<"matrixA and matrixB have same element type", - TCopVTEtIsSameAs<0, 1>>]> { +class NVGPU_MmaSyncOp : + NVGPU_Op>]> { + code extraBaseClassDeclaration = [{ + std::array getMmaShapeAsArray() { + ArrayAttr mmaShape = this->getMmaShape(); + assert(mmaShape.size() == 3 && "mmaShape should be three integers"); + return {mmaShape[0].cast().getInt(), + mmaShape[1].cast().getInt(), + mmaShape[2].cast().getInt()}; + } + }]; + + let hasVerifier = 1; +} + +def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> { let description = [{ The `nvgpu.mma.sync` op represents the warp-level matrix-multiply-and- accumulate (mma) operation that is compatible with `nvvm.mma.sync`. @@ -143,9 +157,63 @@ def NVGPU_MmaSyncOp : NVGPU_Op<"mma.sync", [ `:` `(` type($matrixA) `,` type($matrixB) `,` type($matrixC) `)` `->` type($res) }]; - let hasVerifier = 1; + let extraClassDeclaration = extraBaseClassDeclaration; } +def NVGPU_MmaSparseSyncMetadataType : FixedVectorOfLengthAndType<[2], [I16]>, + BuildableType<"::mlir::VectorType::get(" + "{2},$_builder.getI16Type())">; + +def NVGPU_MmaSparseSyncOp : NVGPU_MmaSyncOp<"mma.sp.sync"> { + let description = [{ + The `nvgu.mma.sp.sync` operation performs a warp-distributed MMA operation + where operand A is "structured sparse". In this case, the `matrixA` operand + represents the (warp-distributed) non-zero values of operand A, and the + `sparse_metadata` operand provides the indices. + + The full description of the sparsity storage format and distribution scheme is + described in the PTX docs. This operation is meant to follow the semantic + described in the PTX documentation here: + https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-sparse-mma + + The way the indices are distributed among the threads in a warp is controlled + by the optional `sparsity_selector` operand, which is `0` by default. For + more information, please consult the PTX documentation linked above. + + Example (targetingthe f16 16x8x32 `mma.sp` PTX instruction): + + ```mlir + nvgpu.mma.sp.sync (%a, %b, %c) metadata (%meta) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + ``` + }]; + + let arguments = (ins AnyVector:$matrixA, + AnyVector:$matrixB, + AnyVector:$matrixC, + NVGPU_MmaSparseSyncMetadataType:$sparseMetadata, + I64ArrayAttr:$mmaShape, + DefaultValuedAttr:$sparsitySelector, + OptionalAttr:$tf32Enabled + ); + + let results = (outs AnyVector:$res); + + let builders = [ + OpBuilder<(ins "Value":$matrixA, + "Value":$matrixB, + "Value":$matrixC, + "Value":$sparseMetadata, + "ArrayRef":$mmaShape)> + ]; + + let assemblyFormat = [{ + `(` $matrixA`,` $matrixB`,` $matrixC `)` `metadata` `(` $sparseMetadata `)` attr-dict + `:` `(` type($matrixA) `,` type($matrixB) `,` type($matrixC) `)` `->` type($res) + }]; + + let extraClassDeclaration = extraBaseClassDeclaration; +} def NVGPU_DeviceAsyncCopyOp : NVGPU_Op<"device_async_copy", [ AttrSizedOperandSegments]> { diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index c4c49f2edd5ff..d9f54b8cb55d7 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -11,8 +11,10 @@ #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Pass/Pass.h" namespace mlir { @@ -253,6 +255,23 @@ struct MmaLdMatrixOpToNVVM : public ConvertOpToLLVMPattern { } }; +/// Convert the given type into the corresponding PTX type (NVVM::MMATypes +/// enum). +static FailureOr getNvvmMmaType(Type t) { + Type elType = getElementTypeOrSelf(t); + if (elType.isInteger(8)) + return NVVM::MMATypes::s8; + if (elType.isInteger(4)) + return NVVM::MMATypes::s4; + if (elType.isF16()) + return NVVM::MMATypes::f16; + if (elType.isF64()) + return NVVM::MMATypes::f64; + if (elType.isF32()) + return NVVM::MMATypes::tf32; + return failure(); +} + struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; @@ -262,53 +281,38 @@ struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { Location loc = op->getLoc(); // Get the shapes of the MMAMatrix type being used. The shapes will // choose which intrinsic this op will be lowered to. - auto aType = op.getMatrixA().getType().cast(); - auto cType = op.getMatrixC().getType().cast(); + VectorType aType = op.getMatrixA().getType(); + VectorType bType = op.getMatrixA().getType(); + VectorType cType = op.getMatrixC().getType(); - int64_t m = op.getMmaShape()[0].cast().getInt(); - int64_t n = op.getMmaShape()[1].cast().getInt(); - int64_t k = op.getMmaShape()[2].cast().getInt(); - std::array gemmShape{m, n, k}; + std::array gemmShape = op.getMmaShapeAsArray(); + + // Tensor Cores (mma.sync) on F32 works only with TensorFloat32 (TF32). + bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); + if (aType.getElementType().isF32() && !tf32Enabled) + return failure(); - NVVM::MMATypes ptxTypeA; - NVVM::MMATypes ptxTypeB; + FailureOr ptxTypeA = getNvvmMmaType(aType); + if (failed(ptxTypeA)) + return op->emitOpError("failed to deduce operand PTX types"); + FailureOr ptxTypeB = getNvvmMmaType(bType); + if (failed(ptxTypeB)) + return op->emitOpError("failed to deduce operand PTX types"); Optional ptxTypeC = NVVM::MmaOp::inferOperandMMAType( cType.getElementType(), /*isAccumulator=*/true); if (!ptxTypeC) return op->emitError( "could not infer the PTX type for the accumulator/result"); - // Tensor Cores (mma.sync) on F32 works only with TensorFloat32 (TF32). - bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); - if (aType.getElementType().isF32() && !tf32Enabled) - return failure(); - + // TODO: add an attribute to the op to customize this behavior. Optional overflow(llvm::None); - if (aType.getElementType().isInteger(8)) { - ptxTypeA = NVVM::MMATypes::s8; - ptxTypeB = NVVM::MMATypes::s8; + if (aType.getElementType().isa()) overflow = NVVM::MMAIntOverflow::satfinite; - } else if (aType.getElementType().isInteger(4)) { - ptxTypeA = NVVM::MMATypes::s4; - ptxTypeB = NVVM::MMATypes::s4; - overflow = NVVM::MMAIntOverflow::satfinite; - } else if (aType.getElementType().isF16()) { - ptxTypeA = NVVM::MMATypes::f16; - ptxTypeB = NVVM::MMATypes::f16; - } else if (aType.getElementType().isF64()) { - ptxTypeA = NVVM::MMATypes::f64; - ptxTypeB = NVVM::MMATypes::f64; - } else if (aType.getElementType().isF32()) { - ptxTypeA = NVVM::MMATypes::tf32; - ptxTypeB = NVVM::MMATypes::tf32; - } else { - return op->emitError("could not deduce operand PTX types"); - } SmallVector matA = - unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), ptxTypeA); + unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), *ptxTypeA); SmallVector matB = - unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), ptxTypeB); + unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), *ptxTypeB); SmallVector matC = unpackOperandVector(rewriter, loc, adaptor.getMatrixC(), *ptxTypeC); @@ -321,7 +325,7 @@ struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { /*b1Op=*/llvm::None, /*intOverflow=*/overflow, /*multiplicandPtxTypes=*/ - std::array{ptxTypeA, ptxTypeB}, + std::array{*ptxTypeA, *ptxTypeB}, /*multiplicandLayouts=*/ std::array{NVVM::MMALayout::row, NVVM::MMALayout::col}); @@ -376,13 +380,182 @@ static void emitCpAsyncOpZfillAsm(Location loc, Value dstPtr, Value srcPtr, SmallVector asmVals{dstPtr, srcPtr, dstBytes, srcBytes}; rewriter.create( - loc, LLVM::LLVMVoidType::get(rewriter.getContext()), /*operands=*/asmVals, + loc, LLVM::LLVMVoidType::get(rewriter.getContext()), + /*operands=*/asmVals, /*asm_string=*/asmStr, /*constraints=*/asmConstraints, /*has_side_effects=*/true, /*is_align_stack=*/false, /*asm_dialect=*/asmDialectAttr, /*operand_attrs=*/ArrayAttr()); } +/// Returns the constraints for the sparse MMA inline assembly instruction. +static std::string buildMmaSparseAsmConstraintString(unsigned matASize, + unsigned matBSize, + unsigned matCSize) { + std::string str; + llvm::raw_string_ostream ss(str); + for (unsigned i = 0; i < matCSize; i++) + ss << "=r,"; + for (unsigned i = 0; i < matASize + matBSize + matCSize; i++) + ss << "r,"; + // The final two operands are for the sparsity metadata and sparsity selector. + ss << "r,r"; + ss.flush(); + return str; +} + +/// Returns the string for the `mma.sp.sync` instruction that corresponds to +/// the give parameters. Note that this function doesn't do any validation, +/// it's expected that the provided parameters correspond to a valid +/// instruction. +static std::string +buildMmaSparseAsmString(const std::array &shape, unsigned matASize, + unsigned matBSize, unsigned matCSize, + NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB, + NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD, + Optional overflow) { + auto ptxTypeStr = [](NVVM::MMATypes ptxType) { + return NVVM::stringifyMMATypes(ptxType); + }; + + std::string asmStr; + llvm::raw_string_ostream ss(asmStr); + ss << "mma.sp.sync.aligned.m" << shape[0] << "n" << shape[1] << "k" + << shape[2] << ".row.col."; + + if (overflow) + ss << NVVM::stringifyMMAIntOverflow(*overflow) << "."; + + ss << ptxTypeStr(ptxTypeD) << "." << ptxTypeStr(ptxTypeA) << "." + << ptxTypeStr(ptxTypeB) << "." << ptxTypeStr(ptxTypeC) << " "; + unsigned asmArgIdx = 0; + + // The operand string is structured into sections `{matC elements...}, + // {matA elements...}, {matB elements...}, {matC elements}`. + for (const auto arrSize : {matCSize, matASize, matBSize, matCSize}) { + ss << "{"; + for (unsigned i = 0; i < arrSize; i++) + ss << "$" << asmArgIdx++ << (i < arrSize - 1 ? "," : ""); + ss << "},"; + } + ss << "$" << asmArgIdx++ << ",$" << asmArgIdx++ << ";"; + ss.flush(); + return asmStr; +} + +/// Builds an inline assembly operation corresponding to the specified MMA +/// sparse sync operation. +static FailureOr emitMmaSparseSyncOpAsm( + Location loc, NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB, + NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD, + Optional overflow, ArrayRef unpackedAData, + ArrayRef unpackedB, ArrayRef unpackedC, Value indexData, + int64_t metadataSelector, const std::array &shape, + Type intrinsicResultType, ConversionPatternRewriter &rewriter) { + auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(), + LLVM::AsmDialect::AD_ATT); + + std::string asmStr = buildMmaSparseAsmString( + shape, unpackedAData.size(), unpackedB.size(), unpackedC.size(), ptxTypeA, + ptxTypeB, ptxTypeC, ptxTypeD, overflow); + std::string constraintStr = buildMmaSparseAsmConstraintString( + unpackedAData.size(), unpackedB.size(), unpackedC.size()); + + Value selectorVal = rewriter.create( + loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(metadataSelector)); + + SmallVector asmVals; + asmVals.reserve(unpackedAData.size() + unpackedB.size() + unpackedC.size() + + 2); + for (ArrayRef args : {unpackedAData, unpackedB, unpackedC}) + llvm::append_range(asmVals, args); + asmVals.push_back(indexData); + asmVals.push_back(selectorVal); + + return rewriter.create(loc, + /*resultTypes=*/intrinsicResultType, + /*operands=*/asmVals, + /*asm_string=*/asmStr, + /*constraints=*/constraintStr, + /*has_side_effects=*/true, + /*is_align_stack=*/false, + /*asm_dialect=*/asmDialectAttr, + /*operand_attrs=*/ArrayAttr()); +} + +/// Lowers `nvgpu.mma.sp.sync` to inline assembly. +struct NVGPUMmaSparseSyncLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(nvgpu::MmaSparseSyncOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + // Get the shapes of the MMAMatrix type being used. The shapes will + // choose which intrinsic this op will be lowered to. + VectorType aType = op.getMatrixA().getType(); + VectorType bType = op.getMatrixB().getType(); + VectorType cType = op.getMatrixC().getType(); + + FailureOr ptxTypeA = getNvvmMmaType(aType); + if (failed(ptxTypeA)) + return op->emitOpError("failed to deduce operand PTX types"); + FailureOr ptxTypeB = getNvvmMmaType(bType); + if (failed(ptxTypeB)) + return op->emitOpError("failed to deduce operand PTX types"); + Optional ptxTypeC = NVVM::MmaOp::inferOperandMMAType( + cType.getElementType(), /*isAccumulator=*/true); + if (!ptxTypeC) + return op->emitError( + "could not infer the PTX type for the accumulator/result"); + + // Same as `mma.sync`, F32 works only with TensorFloat32 (TF32). + bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); + if (aType.getElementType().isF32() && !tf32Enabled) + return failure(); + + // TODO: add an attribute to the op to customize this behavior. + Optional overflow(llvm::None); + if (aType.getElementType().isa()) + overflow = NVVM::MMAIntOverflow::satfinite; + + SmallVector matA = + unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), *ptxTypeA); + SmallVector matB = + unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), *ptxTypeB); + SmallVector matC = + unpackOperandVector(rewriter, loc, adaptor.getMatrixC(), *ptxTypeC); + + Type desiredRetTy = typeConverter->convertType(op->getResultTypes()[0]); + Type intrinsicResTy = inferIntrinsicResultType( + typeConverter->convertType(op->getResultTypes()[0])); + + // Bitcast the sparse metadata from vector<2xf16> to an i32. + Value sparseMetadata = adaptor.getSparseMetadata(); + if (sparseMetadata.getType() != + LLVM::getFixedVectorType(rewriter.getI16Type(), 2)) + return op->emitOpError() << "Expected metadata type to be LLVM " + "VectorType of 2 i16 elements"; + sparseMetadata = rewriter.create( + loc, rewriter.getI32Type(), sparseMetadata); + + FailureOr intrinsicResult = emitMmaSparseSyncOpAsm( + loc, *ptxTypeA, *ptxTypeB, *ptxTypeC, *ptxTypeC, overflow, matA, matB, + matC, sparseMetadata, op.getSparsitySelector(), op.getMmaShapeAsArray(), + intrinsicResTy, rewriter); + if (failed(intrinsicResult)) + return failure(); + + assert((*intrinsicResult).getNumResults() == 1 && + "expected inline asm op returns a single LLVM struct type"); + rewriter.replaceOp( + op, convertIntrinsicResult(op.getLoc(), intrinsicResTy, desiredRetTy, + (*intrinsicResult)->getResult(0), rewriter)); + return success(); + } +}; + struct NVGPUAsyncCopyLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern< @@ -488,8 +661,8 @@ struct NVGPUAsyncWaitLowering void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns) { patterns.add( - converter); + NVGPUAsyncCreateGroupLowering, NVGPUAsyncWaitLowering, + NVGPUMmaSparseSyncLowering>(converter); } std::unique_ptr mlir::createConvertNVGPUToNVVMPass() { diff --git a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp index 9ed04b45aa1c8..24f70cb986e23 100644 --- a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp +++ b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp @@ -13,9 +13,11 @@ #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -80,13 +82,21 @@ void MmaSyncOp::build(::mlir::OpBuilder &odsBuilder, mmaShape, UnitAttr()); } -LogicalResult MmaSyncOp::verify() { - - // Fundamental tensor core mma.sync op - // For F32 (TF32), F16, S8, and S4 data types fundamental tensor core - // operation is of shape: 8-by-8-by-128b. F64 is an exception. The - // verification for mma.sync covering various shapes and data types is based - // on the fundamental tensor core operionation. +/// Performs verification for MmaSyncOp and MmaSparseSyncOp. +static LogicalResult verifyMmaSyncOp(Operation *op, + TypedValue matrixA, + TypedValue matrixB, + TypedValue matrixC, + const std::array &mmaShape, + bool tf32Enabled, bool sparse = false) { + + // The verification for mma.sync covering various shapes and data types is + // based on the fundamental tensor core shape. + + // "Fundamental" tensor core shapes: + // - For F32 (TF32), F16, S8, and S4 data + // types the fundamental tensor core operation is of shape 8-by-8-by-128b. + // - F64 is an exception and is of shape 8-by-8-by-256b. constexpr int kThreads = 32; // 32 threads per warp int64_t shapeM = 8; int64_t shapeN = 8; @@ -98,9 +108,9 @@ LogicalResult MmaSyncOp::verify() { int64_t numElementC{2}; // two accumulator elements per fundamental tile // nvgpu.mma.sync vector operands (per thread) - auto aVector = getMatrixA().getType().cast(); - auto bVector = getMatrixB().getType().cast(); - auto cVector = getMatrixC().getType().cast(); + auto aVector = matrixA.getType(); + auto bVector = matrixB.getType(); + auto cVector = matrixC.getType(); // vector shapes ArrayRef aShape = aVector.getShape(); @@ -110,13 +120,9 @@ LogicalResult MmaSyncOp::verify() { // vector element type Type aType = aVector.getElementType(); - // tensor float32 (TF32) enabled - bool tf32Enabled = getOperation()->hasAttr(getTf32EnabledAttrName()); - - // nvgpu.mma.sync shape (per 32 threads or per warp) - int64_t m = getMmaShape()[0].cast().getInt(); - int64_t n = getMmaShape()[1].cast().getInt(); - int64_t k = getMmaShape()[2].cast().getInt(); + // Certain data types are not allowed in sparse mode. + if (sparse && aType.isF64()) + return op->emitError() << "f64 is not supported for sparse mode"; if (aType.isF64()) { // exception to 8-by-8-128b fundamental tensor core tile size @@ -127,36 +133,43 @@ LogicalResult MmaSyncOp::verify() { aType.isInteger(8) || aType.isInteger(4)) { // 8-by-8-128b fundamental tensor core tile size int operandBitwidth = aType.getIntOrFloatBitWidth(); - shapeK = 128 / operandBitwidth; // 128b wide shapeK + shapeK = 128 / operandBitwidth; // 128b wide shapeK + numElementA = 32 / operandBitwidth; // 32b wide operand A numElementB = 32 / operandBitwidth; // 32b wide operand B } else { - return emitError() << "expected input data type (i4,i8,f16,bf16,tf32,f64) " - "supported by nvgpu.mma.sync"; + return op->emitError() + << "expected input data type (i4,i8,f16,bf16,tf32,f64) " + "supported by " + << op->getName(); } // // Basic verification // + auto [m, n, k] = mmaShape; + // verify warp-wide size for vector a - if (aShape[0] * aShape[1] * kThreads != m * k) - return emitOpError() << "expected " << m * k - << " warp-wide matrix A elements"; + int64_t sparseFactor = sparse ? 2 : 1; + if (aShape[0] * aShape[1] * kThreads != m * k / sparseFactor) + return op->emitOpError() + << "expected " << m * k << " warp-wide matrix A elements"; // verify warp-wide size for vector b if (bShape[0] * bShape[1] * kThreads != k * n) - return emitOpError() << "expected " << k * n - << " warp-wide matrix B elements"; + return op->emitOpError() + << "expected " << k * n << " warp-wide matrix B elements"; // verify warp-wide size for vector c if (cShape[0] * cShape[1] * kThreads != m * n) - return emitOpError() << "expected " << m * n - << " warp-wide matrix C elements"; + return op->emitOpError() + << "expected " << m * n << " warp-wide matrix C elements"; // verify tf32 tensor cores are enabled for only F32 datatype if (tf32Enabled && !(aType.isF32())) - return emitOpError() << "expected tf32 tensor cores only for F32 operands"; + return op->emitOpError() + << "expected tf32 tensor cores only for F32 operands"; // // Extended verification @@ -168,23 +181,48 @@ LogicalResult MmaSyncOp::verify() { int64_t kTile = k / shapeK; // verify shape of aVector - if ((aShape[0] != mTile * kTile) || (aShape[1] != numElementA)) - return emitOpError() << "expected matrix A to be shaped (" << mTile * kTile - << " x " << numElementA << ")"; + if ((aShape[0] != mTile * kTile / (sparse ? 2 : 1)) || + (aShape[1] != numElementA)) + return op->emitOpError() << "expected matrix A to be shaped (" + << mTile * kTile << " x " << numElementA << ")"; // verify shape of bVector if ((bShape[0] != kTile * nTile) || (bShape[1] != numElementB)) - return emitOpError() << "expected matrix B to be shaped (" << kTile * nTile - << " x " << numElementB << ")"; + return op->emitOpError() << "expected matrix B to be shaped (" + << kTile * nTile << " x " << numElementB << ")"; // verify shape of cVector if ((cShape[0] != mTile * nTile) || (cShape[1] != numElementC)) - return emitOpError() << "expected matrix C to be shaped (" << mTile * nTile - << " x " << numElementC << ")"; + return op->emitOpError() << "expected matrix C to be shaped (" + << mTile * nTile << " x " << numElementC << ")"; return success(); } +LogicalResult MmaSyncOp::verify() { + return verifyMmaSyncOp(this->getOperation(), getMatrixA(), getMatrixB(), + getMatrixC(), getMmaShapeAsArray(), + getOperation()->hasAttr(getTf32EnabledAttrName())); +} + +//===----------------------------------------------------------------------===// +// NVGPU_MmaSparseSyncOp +//===----------------------------------------------------------------------===// +void MmaSparseSyncOp::build(::mlir::OpBuilder &odsBuilder, + ::mlir::OperationState &odsState, Value matrixA, + Value matrixB, Value matrixC, Value sparseMetadata, + ArrayRef mmaShape) { + build(odsBuilder, odsState, matrixC.getType(), matrixA, matrixB, matrixC, + sparseMetadata, odsBuilder.getI64ArrayAttr(mmaShape), 0, UnitAttr()); +} + +LogicalResult MmaSparseSyncOp::verify() { + return verifyMmaSyncOp(this->getOperation(), getMatrixA(), getMatrixB(), + getMatrixC(), getMmaShapeAsArray(), + getOperation()->hasAttr(getTf32EnabledAttrName()), + true); +} + //===----------------------------------------------------------------------===// // NVGPU_LdMatrixOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 0a9f8d5611903..c95b2fca9dffd 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -313,3 +313,119 @@ func.func @async_cp_zfill( return } + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_f16_16832( +func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, + %arg1: vector<4x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k32.row.col.f16.f16.f16.f16 {$0,$1},{$2,$3,$4,$5},{$6,$7,$8,$9},{$10,$11},$12,$13;" + // CHECK-SAME: "=r,=r,r,r,r,r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + + // CHECK-DAG: llvm.extractvalue %[[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)> + // CHECK-DAG: llvm.extractvalue %[[d]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>)> + // CHECK: llvm.mlir.undef : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.insertvalue %{{.+}}, %{{.+}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.insertvalue %{{.+}}, %{{.+}}[1] : !llvm.array<2 x vector<2xf16>> + return %d : vector<2x2xf16> +} + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_f16_16816( +func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, + %arg1: vector<2x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {$0,$1},{$2,$3},{$4,$5},{$6,$7},$8,$9;" + // CHECK-SAME: "=r,=r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 16]} : + (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_i8_16864( +func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, + %arg1: vector<4x4xi8>, + %arg2: vector<2x2xi32>, + %arg3: vector<2xi16>) -> vector<2x2xi32> { + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<4xi8>> + + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + + // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k64.row.col.satfinite.s32.s8.s8.s32 + // CHECK-SAME: "=r,=r,=r,=r,r,r,r,r,r,r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(i32, i32, i32, i32) + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 64]} : + (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} diff --git a/mlir/test/Dialect/NVGPU/roundtrip.mlir b/mlir/test/Dialect/NVGPU/roundtrip.mlir index 524f1fd6907b7..ad516b4d2c200 100644 --- a/mlir/test/Dialect/NVGPU/roundtrip.mlir +++ b/mlir/test/Dialect/NVGPU/roundtrip.mlir @@ -19,6 +19,44 @@ func.func @mma_sync(%arg0: vector<4x2xf16>, return %d : vector<2x2xf16> } +// CHECK-LABEL: func @mma_sp_sync_f16_16832( +func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, + %arg1: vector<4x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 32] + // CHECK-SAME: (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// CHECK-LABEL: func @mma_sp_sync_f16_16816( +func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, + %arg1: vector<2x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 16] + // CHECK-SAME: (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 16]} : + (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// CHECK-LABEL: func @mma_sp_sync_i8_16864( +func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, + %arg1: vector<4x4xi8>, + %arg2: vector<2x2xi32>, + %arg3: vector<2xi16>) -> vector<2x2xi32> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 64] + // CHECK-SAME: (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 64]} : + (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} func.func @async_cp(%dst : memref<2x7x5xf32, 3>, %src : memref<4x5xf32>){ // CHECK-LABEL: func @async_cp From 1ce5f93d03c24ad780f099eccc2dc768e9e9e30f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 16 Oct 2022 22:26:13 -0700 Subject: [PATCH 435/516] InstSimplify: Add new baseline tests for fdiv --- llvm/test/Transforms/InstSimplify/fdiv.ll | 99 +++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index 2b998b3e73234..2b583a04818fa 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -59,3 +59,102 @@ define <2 x i1> @pr6096() { %fcmp = fcmp ole <2 x float> %fdiv, zeroinitializer ret <2 x i1> %fcmp } + + +; https://alive2.llvm.org/ce/z/JxX5in +define float @fdiv_nnan_ninf_by_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan ninf float %x, 0.0 + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_negzero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_negzero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan ninf float %x, -0.0 + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_undef_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_undef_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, undef + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_poison_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_poison_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, poison + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan ninf <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_undef_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_undef_v2f32( +; CHECK-NEXT: ret <2 x float> poison +; + %fdiv = fdiv nnan ninf <2 x float> %x, undef + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_zero_undef_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_undef_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan ninf <2 x float> %x, + ret <2 x float> %fdiv +} + +; https://alive2.llvm.org/ce/z/wRV28p +define float @fdiv_nnan_nsz_ninf_by_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, 0.0 + ret float %fdiv +} + +define float @fdiv_nnan_nsz_ninf_by_negzero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, -0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_nsz_ninf_by_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_zero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_nsz_ninf_by_negzero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, + ret <2 x float> %fdiv +} From 7dd27a75a261f57c6d038eb5faa421c49b12f1d5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 16 Oct 2022 22:27:36 -0700 Subject: [PATCH 436/516] InstSimplify: Fold fdiv nnan ninf x, 0 -> poison https://alive2.llvm.org/ce/z/JxX5in --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++++ llvm/test/Transforms/InstSimplify/fdiv.ll | 12 ++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 98db224e9e809..c8b796a34c6d7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5461,6 +5461,10 @@ simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (match(Op0, m_FNegNSZ(m_Specific(Op1))) || match(Op1, m_FNegNSZ(m_Specific(Op0)))) return ConstantFP::get(Op0->getType(), -1.0); + + // nnan ninf X / [-]0.0 -> poison + if (FMF.noInfs() && match(Op1, m_AnyZeroFP())) + return PoisonValue::get(Op1->getType()); } return nullptr; diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index 2b583a04818fa..38e31257e185a 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -64,8 +64,7 @@ define <2 x i1> @pr6096() { ; https://alive2.llvm.org/ce/z/JxX5in define float @fdiv_nnan_ninf_by_zero_f32(float %x) { ; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf float [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: ret float [[FDIV]] +; CHECK-NEXT: ret float poison ; %fdiv = fdiv nnan ninf float %x, 0.0 ret float %fdiv @@ -73,8 +72,7 @@ define float @fdiv_nnan_ninf_by_zero_f32(float %x) { define float @fdiv_nnan_ninf_by_negzero_f32(float %x) { ; CHECK-LABEL: @fdiv_nnan_ninf_by_negzero_f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf float [[X:%.*]], -0.000000e+00 -; CHECK-NEXT: ret float [[FDIV]] +; CHECK-NEXT: ret float poison ; %fdiv = fdiv nnan ninf float %x, -0.0 ret float %fdiv @@ -98,8 +96,7 @@ define float @fdiv_nnan_ninf_by_poison_f32(float %x) { define <2 x float> @fdiv_nnan_ninf_by_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf <2 x float> [[X:%.*]], zeroinitializer -; CHECK-NEXT: ret <2 x float> [[FDIV]] +; CHECK-NEXT: ret <2 x float> poison ; %fdiv = fdiv nnan ninf <2 x float> %x, zeroinitializer ret <2 x float> %fdiv @@ -115,8 +112,7 @@ define <2 x float> @fdiv_nnan_ninf_by_undef_v2f32(<2 x float> %x) { define <2 x float> @fdiv_nnan_ninf_by_zero_undef_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_undef_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf <2 x float> [[X:%.*]], -; CHECK-NEXT: ret <2 x float> [[FDIV]] +; CHECK-NEXT: ret <2 x float> poison ; %fdiv = fdiv nnan ninf <2 x float> %x, ret <2 x float> %fdiv From a2c4ca50caf43a3924a37580451ebe9fa3daa128 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Mon, 7 Nov 2022 08:48:52 -0800 Subject: [PATCH 437/516] Revert "[mlir][sparse] support Parallel for/reduction." This reverts commit 838389780e56f1a198a94f66ea436359466bf5ed. This broke the windows mlir buildbot: https://lab.llvm.org/buildbot/#/builders/13/builds/27934 --- .../SparseTensor/Transforms/CodegenUtils.cpp | 146 +++++------------- .../SparseTensor/Transforms/CodegenUtils.h | 32 +--- .../Transforms/Sparsification.cpp | 133 +++++++++------- .../Dialect/SparseTensor/sparse_parallel.mlir | 20 +-- .../SparseTensor/sparse_parallel_reduce.mlir | 63 -------- .../SparseTensor/CPU/sparse_matmul.mlir | 8 - .../SparseTensor/CPU/sparse_matvec.mlir | 10 -- 7 files changed, 127 insertions(+), 285 deletions(-) delete mode 100644 mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 27b7acbd322dc..032d8026b2668 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -219,12 +219,9 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { - assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); - // TODO: support multiple return on parallel for? - assert(!isParallel || reduc.empty() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; @@ -235,38 +232,11 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; - Operation *loop = nullptr; - Value iv; - if (isParallel) { - scf::ParallelOp parOp = - builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(parOp.getBody()); - assert(parOp.getNumReductions() == reduc.size()); - iv = parOp.getInductionVars()[0]; - - // In-place update on the reduction variable vector. - // Note that the init vals is not the actual reduction variables but instead - // used as a `special handle` to (temporarily) represent them. The - // expression on init vals will be moved into scf.reduce and replaced with - // the block arguments when exiting the loop (see exitForLoop). This is - // needed as we can not build the actual reduction block and get the actual - // reduction varaible before users fill parallel loop body. - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = parOp.getInitVals()[i]; - loop = parOp; - } else { - scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(forOp.getBody()); - iv = forOp.getInductionVar(); - - // In-place update on the reduction variable vector. - assert(forOp.getNumRegionIterArgs() == reduc.size()); - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = forOp.getRegionIterArg(i); - loop = forOp; - } - assert(loop && iv); + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(forOp.getBody()); + Value iv = forOp.getInductionVar(); + assert(iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. @@ -283,12 +253,16 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( // NOTE: we can also prepares for next dim here in advance // Push the loop into stack - loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, coord[tid][dim]); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); - return loop; + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + return forOp; } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( @@ -460,73 +434,17 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( } } -void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; - auto forOp = llvm::dyn_cast(loopInfo.loop); - if (forOp) { - if (!reduc.empty()) { - assert(reduc.size() == forOp.getNumResults()); - rewriter.setInsertionPointToEnd(forOp.getBody()); - rewriter.create(loc, reduc); - } - // Exit the loop. - rewriter.setInsertionPointAfter(forOp); - // In-place update reduction variables. - for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) - reduc[i] = forOp.getResult(i); - } else { - auto parOp = llvm::cast(loopInfo.loop); - if (!reduc.empty()) { - assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); - Operation *redExp = reduc.front().getDefiningOp(); - // Reduction expression should have no use. - assert(redExp->getUses().empty()); - // This must be a binary operation. - // NOTE: This is users' responsibilty to ensure the operation are - // commutative. - assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); - - Value redVal = parOp.getInitVals().front(); - Value curVal; - if (redExp->getOperand(0) == redVal) - curVal = redExp->getOperand(1); - else if (redExp->getOperand(1) == redVal) - curVal = redExp->getOperand(0); - // One of the operands must be the init value (which is also the - // previous reduction value). - assert(curVal); - // The reduction expression should be the only user of the reduction val - // inside the parallel for. - unsigned numUsers = 0; - for (Operation *op : redVal.getUsers()) { - if (op->getParentOp() == parOp) - numUsers++; - } - assert(numUsers == 1); - (void)numUsers; // to silence unused variable warning in release build - - rewriter.setInsertionPointAfter(redExp); - auto redOp = rewriter.create(loc, curVal); - // Attach to the reduction op. - Block *redBlock = &redOp.getRegion().getBlocks().front(); - rewriter.setInsertionPointToEnd(redBlock); - Operation *newRed = rewriter.clone(*redExp); - // Replaces arguments of the reduction expression by using the block - // arguments from scf.reduce. - rewriter.updateRootInPlace( - newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); - // Erases the out-dated reduction expression. - rewriter.eraseOp(redExp); - rewriter.setInsertionPointToEnd(redBlock); - rewriter.create(loc, newRed->getResult(0)); - } - rewriter.setInsertionPointAfter(parOp); - // In-place update reduction variables. - for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) - reduc[i] = parOp.getResult(i); + auto forOp = llvm::cast(loopInfo.loop); + if (!reduc.empty()) { + assert(reduc.size() == forOp.getNumResults()); + builder.setInsertionPointToEnd(forOp.getBody()); + builder.create(loc, reduc); } // Finished iterating a tensor, clean up @@ -540,10 +458,14 @@ void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } + // exit the loop + builder.setInsertionPointAfter(forOp); + return forOp.getResults(); } -void SparseTensorLoopEmitter::exitCoIterationLoop( - OpBuilder &builder, Location loc, MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; @@ -577,10 +499,10 @@ void SparseTensorLoopEmitter::exitCoIterationLoop( } // Reduction value from users. - for (unsigned i = 0, e = reduc.size(); i < e; i++) { - operands.push_back(reduc[i]); - // In place update reduction variable. - reduc[i] = whileOp->getResult(o++); + SmallVector ret; + for (auto red : reduc) { + operands.push_back(red); + ret.push_back(whileOp->getResult(o++)); } // An (optional) universal index. @@ -595,24 +517,26 @@ void SparseTensorLoopEmitter::exitCoIterationLoop( assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); + return ret; } -void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, - Location loc, - MutableArrayRef reduc) { +SmallVector +SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc, + ArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - exitCoIterationLoop(rewriter, loc, reduc); + red = exitCoiterationLoop(builder, loc, reduc); } else { - exitForLoop(rewriter, loc, reduc); + red = exitForLoop(builder, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); + return red; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index a75d3920a4d55..3228eb4c79cb2 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -380,8 +380,8 @@ class SparseTensorLoopEmitter { ArrayRef dims, bool needsUniv, MutableArrayRef reduc = {}, ArrayRef extraTids = {}, ArrayRef extraDims = {}); - void exitCurrentLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc = {}); + SmallVector exitCurrentLoop(OpBuilder &builder, Location loc, + ArrayRef reduc = {}); /// Returns the array of coordinate for all the loop generated till now. void getCoordinateArray(SmallVectorImpl &coords) const { @@ -452,35 +452,17 @@ class SparseTensorLoopEmitter { ArrayRef dims); /// Exits a for loop, returns the reduction results, e.g., - /// For sequential for loops: /// %ret = for () { /// ... - /// %val = addi %args, %c /// yield %val /// } - /// For parallel loops, the following generated code by users: - /// %ret = parallel () init(%args) { - /// ... - /// %val = op %args, %c - /// } - /// will be transformed into - /// %ret = parallel () init(%args) { - /// ... - /// scf.reduce(%c) bb0(%0, %1){ - /// %val = op %0, %1 - /// scf.reduce.return %val - /// } - /// } - /// NOTE: only one instruction will be moved into reduce block, transformation - /// will fail if multiple instructions are used to compute the reduction - /// value. - /// Return %ret to user, while %val is provided by users (`reduc`). - void exitForLoop(RewriterBase &rewriter, Location loc, - MutableArrayRef reduc); + /// Return %ret to user, while %val is provided by users (`reduc`) + SmallVector exitForLoop(OpBuilder &builder, Location loc, + ArrayRef reduc); /// Exits a while loop, returns the reduction results. - void exitCoIterationLoop(OpBuilder &builder, Location loc, - MutableArrayRef reduc); + SmallVector exitCoiterationLoop(OpBuilder &builder, Location loc, + ArrayRef reduc); // Whether the loop emitter needs to treat the last tensor as the output // tensor. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 533d31fdb5536..9f01731a34d4c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -410,34 +410,6 @@ static Value getCustomRedId(Operation *op) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// -/// Generates loop boundary statements (entering/exiting loops). The function -/// passes and updates the reduction value. -static Optional genLoopBoundary( - CodeGen &codegen, Merger &merger, - function_ref(MutableArrayRef reduc)> - callback) { - SmallVector reduc; - if (codegen.redVal) - reduc.push_back(codegen.redVal); - if (codegen.expValues) - reduc.push_back(codegen.expCount); - if (codegen.insChain) - reduc.push_back(codegen.insChain); - - auto r = callback(reduc); - - // Callback should do in-place update on reduction value vector. - unsigned i = 0; - if (codegen.redVal) - updateReduc(merger, codegen, reduc[i++]); - if (codegen.expValues) - codegen.expCount = reduc[i++]; - if (codegen.insChain) - codegen.insChain = reduc[i]; - - return r; -} - /// Local bufferization of all dense and sparse data structures. static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { @@ -897,25 +869,23 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder, /// Returns parallelization strategy. Any implicit loop in the Linalg /// operation that is marked "parallel" is a candidate. Whether it is actually /// converted to a parallel operation depends on the requested strategy. -static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) { +static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction, + bool isSparse) { // Reject parallelization of sparse output. if (codegen.sparseOut) return false; - // Parallel loops on tensor expansion can cause data races. - if (codegen.expCount) - return false; // Inspect strategy. switch (codegen.options.parallelizationStrategy) { case SparseParallelizationStrategy::kNone: return false; case SparseParallelizationStrategy::kDenseOuterLoop: - return isOuter && !isSparse; + return isOuter && !isSparse && !isReduction; case SparseParallelizationStrategy::kAnyStorageOuterLoop: - return isOuter; + return isOuter && !isReduction; case SparseParallelizationStrategy::kDenseAnyLoop: - return !isSparse; + return !isSparse && !isReduction; case SparseParallelizationStrategy::kAnyStorageAnyLoop: - return true; + return !isReduction; } llvm_unreachable("unexpected parallelization strategy"); } @@ -928,16 +898,33 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef extraDims) { Location loc = op.getLoc(); auto iteratorTypes = op.getIteratorTypesArray(); + bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) || isSingletonDLT(merger.getDimLevelType(tid, idx)); - bool isParallel = isParallelFor(codegen, isOuter, isSparse); - - Operation *loop = - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - return codegen.loopEmitter.enterLoopOverTensorAtDim( - builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims); - }).value(); - assert(loop); + bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse); + assert(!isParallel); + + // Emit a sequential for loop. + SmallVector operands; + if (codegen.redVal) + operands.push_back(codegen.redVal); + if (codegen.expValues) + operands.push_back(codegen.expCount); + if (codegen.insChain) + operands.push_back(codegen.insChain); + + Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim( + builder, loc, tid, dim, operands, isParallel, extraTids, extraDims); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, operands[o++]); + if (codegen.expValues) + codegen.expCount = operands[o++]; + if (codegen.insChain) + codegen.insChain = operands[o++]; + assert(o == operands.size()); + return loop; } @@ -947,15 +934,29 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef condTids, ArrayRef condDims, ArrayRef extraTids, ArrayRef extraDims) { + SmallVector operands; + + // Construct the while-loop with a parameter for each index. + if (codegen.redVal) + operands.push_back(codegen.redVal); + if (codegen.expValues) + operands.push_back(codegen.expCount); + if (codegen.insChain) + operands.push_back(codegen.insChain); + + Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims( + builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids, + extraDims); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, operands[o++]); + if (codegen.expValues) + codegen.expCount = operands[o++]; + if (codegen.insChain) + codegen.insChain = operands[o++]; + assert(o == operands.size()); - Operation *loop = - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - // Construct the while-loop with a parameter for each index. - return codegen.loopEmitter.enterCoIterationOverTensorsAtDims( - builder, op.getLoc(), condTids, condDims, needsUniv, reduc, - extraTids, extraDims); - }).value(); - assert(loop); return loop; } @@ -1185,21 +1186,37 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen, } /// Ends a single loop in current sequence. Returns new values for needsUniv. -static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter, +static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op, Operation *loop, unsigned idx, unsigned li, bool needsUniv) { // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv, + finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv, merger.lat(li).bits, whileOp); } else { needsUniv = false; } - genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { - codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc); - return llvm::None; - }); + SmallVector reduc; + if (codegen.redVal) + reduc.push_back(codegen.redVal); + if (codegen.expValues) + reduc.push_back(codegen.expCount); + if (codegen.insChain) + reduc.push_back(codegen.insChain); + + auto loopRet = + codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc); + assert(reduc.size() == loopRet.size()); + + unsigned o = 0; + if (codegen.redVal) + updateReduc(merger, codegen, loopRet[o++]); + if (codegen.expValues) + codegen.expCount = loopRet[o++]; + if (codegen.insChain) + codegen.insChain = loopRet[o++]; + assert(o == loopRet.size()); return needsUniv; } diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir index f38865c5e2a4f..38766b08ccab8 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -1,13 +1,14 @@ // RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \ // RUN: FileCheck %s --check-prefix=CHECK-PAR0 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR1 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR2 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR3 -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// RUN: FileCheck %s --check-prefix=CHECK-PAR4 +// FIXME: we do not support vectorization/parallel loops in loop emitter right now +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3 +// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4 #DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] @@ -150,8 +151,7 @@ func.func @scale_ss(%scale: f32, // // CHECK-PAR4-LABEL: func @matvec // CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.reduce +// CHECK-PAR4: scf.for // CHECK-PAR4: return // func.func @matvec(%arga: tensor<16x32xf32, #CSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir deleted file mode 100644 index 8ba66d2c92ae1..0000000000000 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir +++ /dev/null @@ -1,63 +0,0 @@ -// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// RUN: FileCheck %s - -#CSR = #sparse_tensor.encoding<{ - dimLevelType = [ "dense", "compressed" ] -}> - -#trait_matvec = { - indexing_maps = [ - affine_map<(i,j) -> (i,j)>, // A - affine_map<(i,j) -> (j)>, // b - affine_map<(i,j) -> (i)> // x (out) - ], - iterator_types = ["parallel", "reduction"], - doc = "x(i) += A(i,j) * b(j)" -} -// CHECK-LABEL: func.func @matvec( -// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>, -// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, -// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { -// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index -// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} -// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} -// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] -// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> -// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> -// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { -// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> -// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref -// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index -// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref -// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { -// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref -// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> -// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 -// CHECK: scf.reduce(%[[TMP_14]]) : f32 { -// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): -// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 -// CHECK: scf.reduce.return %[[TMP_15]] : f32 -// CHECK: } -// CHECK: scf.yield -// CHECK: } -// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> -// CHECK: scf.yield -// CHECK: } -// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> -// CHECK: return %[[TMP_5]] : tensor<16xf32> -func.func @matvec(%arga: tensor<16x32xf32, #CSR>, - %argb: tensor<32xf32>, - %argx: tensor<16xf32>) -> tensor<16xf32> { - %0 = linalg.generic #trait_matvec - ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { - ^bb(%A: f32, %b: f32, %x: f32): - %0 = arith.mulf %A, %b : f32 - %1 = arith.addf %0, %x : f32 - linalg.yield %1 : f32 - } -> tensor<16xf32> - return %0 : tensor<16xf32> -} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index 459b0e13667f6..c12d2b9b913e4 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -2,14 +2,6 @@ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// -// Do the same run, but now with parallelization. -// -// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ -// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ -// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ -// RUN: FileCheck %s - #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index adc0b261f04d3..59e7f33c22c88 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -4,16 +4,6 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// -// Do the same run, but now with parallelization. -// -// RUN: mlir-opt %s \ -// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ -// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ -// RUN: mlir-cpu-runner \ -// RUN: -e entry -entry-point-result=void \ -// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ -// RUN: FileCheck %s !Filename = !llvm.ptr From 058f727a98579c97130b8483cdf50a13ffa9692d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 17 Oct 2022 12:20:02 -0700 Subject: [PATCH 438/516] InstCombine: Add baseline checks for fdiv --- llvm/test/Transforms/InstCombine/fdiv.ll | 55 ++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index d0c0b66b813ad..38557c326d129 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -937,3 +937,58 @@ define <2 x half> @powi_recip(<2 x half> %x, i32 %y) { %r = fdiv reassoc arcp nnan ninf <2 x half> , %p ret <2 x half> %r } + +define float @fdiv_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv float %x, 0.0 + ret float %fdiv +} + +; https://alive2.llvm.org/ce/z/gLBFKB +define float @fdiv_nnan_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan float %x, 0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_zero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define float @fdiv_nnan_zero_f32_fmf(float %x) { +; CHECK-LABEL: @fdiv_nnan_zero_f32_fmf( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, 0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define float @fdiv_nnan_neg_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_neg_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan float %x, -0.0 + ret float %fdiv +} From ec224e3b680464101669f36baf547bf3d342bc07 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Mon, 7 Nov 2022 09:00:08 -0800 Subject: [PATCH 439/516] Revert "[mlir][sparse] fix sparse tensor rewriting patterns that do not propagate sparse tensor SSA properly." This reverts commit 70508b614e6478ba2c3fc79e935e2c68e2d79b71. This change depends on a reverted change that broke the windows mlir buildbot; reverting to bring remaining mlir bots to green --- .../SparseTensor/IR/SparseTensorDialect.cpp | 5 +- .../SparseTensor/Transforms/CodegenUtils.cpp | 3 - .../Transforms/SparseTensorRewriting.cpp | 119 +++++++----------- .../SparseTensor/convert_dense2sparse.mlir | 22 ++-- .../SparseTensor/convert_sparse2sparse.mlir | 10 +- .../SparseTensor/rewriting_for_codegen.mlir | 11 +- .../SparseTensor/sparse_concat_codegen.mlir | 27 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 104 +++++++-------- 8 files changed, 120 insertions(+), 181 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 693af03a94cb5..bfd38e12ea36c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -603,12 +603,9 @@ void ForeachOp::build( std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType()); // Followed by one value. blockArgTypes.push_back(rtp.getElementType()); - // Followed by reduction variable. - blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end()); SmallVector blockArgLocs; - std::fill_n(std::back_inserter(blockArgLocs), blockArgTypes.size(), - tensor.getLoc()); + std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc()); OpBuilder::InsertionGuard guard(builder); auto ®ion = *result.regions.front(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 032d8026b2668..1e9cadd13e156 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -880,9 +880,6 @@ Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, return val; } -// FIXME: -// 1. Dense tensors loop should be generated by loop emitter. -// 2. Support reduction variables to propagate SSA chains properly. void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( OpBuilder &builder, Location loc, Value src, unsigned rank, function_ref bodyBuilder) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index d0613c09503c0..7747fd73aa9bb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -356,8 +356,8 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); - ForeachOp foreachOp = rewriter.create( - loc, srcTensor, cooBuffer, + rewriter.create( + loc, srcTensor, llvm::None, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector srcIndices; @@ -368,11 +368,11 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - auto t = builder.create(loc, v, reduc.front(), dstIndices); - builder.create(loc, t); + builder.create(loc, v, cooBuffer, dstIndices); + builder.create(loc); }); - auto t = rewriter.create(loc, foreachOp.getResult(0), true); - rewriter.replaceOpWithNewOp(op, dstTp, t); + + rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); return success(); } }; @@ -442,14 +442,13 @@ struct ConcatenateRewriter : public OpRewritePattern { rewriter.create(loc, cooTp, ValueRange()).getResult(); Value offset = constantIndex(rewriter, loc, 0); - ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds the indexing map. // Build a for op for each input tensor to append new values into the // output tensor. - foreachOp = rewriter.create( - loc, input, cooBuffer, + rewriter.create( + loc, input, llvm::None, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector indices; @@ -462,8 +461,8 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - auto t = builder.create(loc, v, reduc.front(), indices); - builder.create(loc, t); + builder.create(loc, v, cooBuffer, indices); + builder.create(loc); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -472,10 +471,7 @@ struct ConcatenateRewriter : public OpRewritePattern { assert(!ShapedType::isDynamic(d)); offset = rewriter.create(loc, offset, constantIndex(rewriter, loc, d)); - cooBuffer = foreachOp.getResult(0); } - - cooBuffer = rewriter.create(loc, cooBuffer, true); rewriter.replaceOpWithNewOp(op, rtp, cooBuffer); return success(); } @@ -606,8 +602,8 @@ struct ConvertRewriter : public OpRewritePattern { srcTp = getUnorderedCOOFromType(srcTp); tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); - auto foreachOp = rewriter.create( - loc, src, tmpCoo, + rewriter.create( + loc, src, llvm::None, [&](OpBuilder &builder, Location loc, ValueRange args, Value v, ValueRange reduc) { SmallVector indices; @@ -615,10 +611,10 @@ struct ConvertRewriter : public OpRewritePattern { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - auto t = builder.create(loc, v, reduc.front(), indices); - builder.create(loc, t); + builder.create(loc, v, tmpCoo, indices); + builder.create(loc); }); - src = rewriter.create(loc, foreachOp.getResult(0), true); + src = tmpCoo; } // Sort the COO tensor so that its elements are ordered via increasing @@ -657,31 +653,29 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - auto foreachOp = rewriter.create( - loc, src, dst, - [&](OpBuilder &builder, Location loc, ValueRange args, Value v, - ValueRange reduc) { - SmallVector indices; - for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { - uint64_t dim = toStoredDim(encDst, i); - indices.push_back(args[dim]); - } - auto t = builder.create(loc, v, reduc.front(), indices); - builder.create(loc, t); - }); + rewriter.create(loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, + ValueRange args, Value v, ValueRange reduc) { + SmallVector indices; + for (int64_t i = 0, e = srcTp.getRank(); i < e; + i++) { + uint64_t dim = toStoredDim(encDst, i); + indices.push_back(args[dim]); + } + builder.create(loc, v, dst, indices); + builder.create(loc); + }); - // Release the temporary COO if it is created. Note that tmpCoo is - // invalidated due to foreach and updated to src. + // Release the temporary COO if it is created. if (tmpCoo) - rewriter.create(loc, src); + rewriter.create(loc, tmpCoo); // Directly replace op with dst results in bufferization error message // "sparse tensor allocation should not escape function". // As such, we insert a trivial tensor convert which will be removed by // codegen. rewriter.setInsertionPointAfter(op); - auto t = rewriter.create(loc, foreachOp.getResult(0), true); - rewriter.replaceOpWithNewOp(op, dstTp, t); + rewriter.replaceOpWithNewOp(op, dstTp, dst); return success(); } }; @@ -700,8 +694,6 @@ struct ForeachRewriter : public OpRewritePattern { int64_t rank = rtp.getRank(); auto enc = getSparseTensorEncoding(rtp); - SmallVector reduc = op.getInitArgs(); - // 1. Generates loop for the sparse input. SparseTensorLoopEmitter loopEmitter(ValueRange{input}); loopEmitter.initializeLoopEmit(rewriter, loc); @@ -709,9 +701,7 @@ struct ForeachRewriter : public OpRewritePattern { // TODO: provide utility function for loop sequences that only contains // one for loop? loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast(i)); - // Note that reduc will be taken care of by loop emitter and get updated - // in place. - loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i, reduc); + loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i); } SmallVector coords; @@ -726,7 +716,15 @@ struct ForeachRewriter : public OpRewritePattern { : rewriter.create(loc, vals, coords); // 2. Inline the block in the foreach operator. + Block::iterator inlinePos = rewriter.getInsertionPoint(); Block *srcBlock = op.getBody(); + // Remove sparse_tensor.yield. + rewriter.eraseOp(srcBlock->getTerminator()); + + for (int64_t i = 0; i < rank; i++) { + loopEmitter.exitCurrentLoop(rewriter, loc); + loopEmitter.exitCurrentLoopSeq(); + } SmallVector args; // Remap coordinates. @@ -736,33 +734,11 @@ struct ForeachRewriter : public OpRewritePattern { } // Remap value. args.push_back(val); - // Remap reduction variables. - args.append(reduc); - - // Remove sparse_tensor.yield. - SmallVector reducValue = srcBlock->getTerminator()->getOperands(); - rewriter.eraseOp(srcBlock->getTerminator()); // Inline body. - if (!reducValue.empty()) { - rewriter.mergeBlocks(srcBlock, rewriter.getBlock(), args); - } else { - // This is annoying, since scf.for inserts a implicit yield op when - // there is no reduction variable upon creation, in this case we need to - // merge the block *before* the yield op. - rewriter.mergeBlockBefore(srcBlock, &*rewriter.getInsertionPoint(), args); - } - - for (int64_t i = 0; i < rank; i++) { - // Link the reduction chain. Note that loop emitter update the reducValue - // in place. - loopEmitter.exitCurrentLoop(rewriter, loc, reducValue); - loopEmitter.exitCurrentLoopSeq(); - } - - // Replace the foreach operator with the value returned by the outtermost - // for loop. - rewriter.replaceOp(op, reducValue); + rewriter.mergeBlockBefore(srcBlock, &*inlinePos, args); + // delete the foreach operator. + rewriter.eraseOp(op); return success(); } }; @@ -825,8 +801,7 @@ struct NewRewriter : public OpRewritePattern { .getResult(0); Type eltTp = dstTp.getElementType(); Value value = genAllocaScalar(rewriter, loc, eltTp); - scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1, - ArrayRef(cooBuffer)); + scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1); rewriter.setInsertionPointToStart(forOp.getBody()); SmallString<18> getNextFuncName{"getSparseTensorReaderNext", @@ -841,17 +816,13 @@ struct NewRewriter : public OpRewritePattern { loc, indices, constantIndex(rewriter, loc, i))); } Value v = rewriter.create(loc, value); - auto t = rewriter.create(loc, v, forOp.getRegionIterArg(0), - indicesArray); - rewriter.create(loc, ArrayRef(t)); + rewriter.create(loc, v, cooBuffer, indicesArray); rewriter.setInsertionPointAfter(forOp); - // Link SSA chain. - cooBuffer = forOp.getResult(0); // Release the sparse tensor reader. createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader}, EmitCInterface::Off); - cooBuffer = rewriter.create(loc, cooBuffer, true); + Value newOp = rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); // Release the unordered COO tensor buffer. diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index cb1f16ef2cd20..d67e11b92dd9c 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -116,7 +116,6 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V:.*]] = tensor.extract %[[A]]{{\[}}%[[FI]], %[[FJ]]] : tensor<2x4xf64> // CHECK-RWT: %[[NZ:.*]] = arith.cmpf une, %[[V]], %[[F0]] : f64 // CHECK-RWT: scf.if %[[NZ]] { -// // FIXME: the SSA chain is broken here! // CHECK-RWT: %{{.*}} = sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[FI]], %[[FJ]]] // CHECK-RWT: } // CHECK-RWT: } @@ -127,13 +126,11 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V2:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]], %[[I1]] jointly %[[V2]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) -// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64, %[[R0:.*]]: tensor -// CHECK-RWT: %[[RET:.*]] = sparse_tensor.insert %[[FV]] into %[[R0]]{{\[}}%[[FI0]], %[[FI1]]] -// CHECK-RWT: sparse_tensor.yield %[[RET]] +// CHECK-RWT: sparse_tensor.foreach in %[[COO]] +// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): +// CHECK-RWT: sparse_tensor.insert %[[FV]] into %[[DST]]{{\[}}%[[FI0]], %[[FI1]]] // CHECK-RWT: } -// CHECK-RWT: %[[NT:.*]] = sparse_tensor.load %[[NEW_T]] hasInserts -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[NT]] +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { @@ -182,7 +179,6 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[I1r:.*]] = tensor.extract %[[SI]]{{\[}}%[[FI]], %[[C1]]] : tensor<2x2xi64> // CHECK-RWT: %[[I1:.*]] = arith.index_cast %[[I1r]] : i64 to index // CHECK-RWT: %[[V:.*]] = tensor.extract %[[SV]]{{\[}}%[[FI]]] : tensor<2xf32> -// // FIXME: the SSA chain is broken here! // CHECK-RWT: sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[I0]], %[[I1]]] // CHECK-RWT: } // CHECK-RWT: %[[TI0:.*]] = sparse_tensor.indices %[[COO]] {dimension = 0 : index} @@ -191,13 +187,11 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[TV:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[TI0]], %[[TI1]] jointly %[[TV]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) -// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32, %[[R0:.*]]: tensor -// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.insert %[[F2V]] into %[[R0]]{{\[}}%[[F2I0]], %[[F2I1]]] -// CHECK-RWT: sparse_tensor.yield %[[NEW_T]] +// CHECK-RWT: sparse_tensor.foreach in %[[COO]] +// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32): +// CHECK-RWT: sparse_tensor.insert %[[F2V]] into %[[DST]]{{\[}}%[[F2I0]], %[[F2I1]]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]] +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 17145f8d37380..92f9e46b90938 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -94,13 +94,11 @@ func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[A]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]] jointly %[[V]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor(%[[D]]) -// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[A]] init(%[[DST]]) -// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32, %[[T:.*]]: tensor> func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 3a6cf999df90a..79b616dec8304 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -18,19 +18,18 @@ // CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) // CHECK: %[[VB:.*]] = memref.alloca() -// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) +// CHECK: scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] { // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) // CHECK: %[[E0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[E1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[V:.*]] = memref.load %[[VB]][] -// CHECK: %[[T1:.*]] = sparse_tensor.insert %[[V]] into %[[A2]]{{\[}}%[[E0]], %[[E1]]] -// CHECK: scf.yield %[[T1]] +// CHECK: sparse_tensor.insert %[[V]] into %[[T]]{{\[}}%[[E0]], %[[E1]]] // CHECK: } // CHECK: call @delSparseTensorReader(%[[R]]) -// CHECK: %[[T3:.*]] = sparse_tensor.load %[[T2]] hasInserts -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T3]] -// CHECK: bufferization.dealloc_tensor %[[T3]] +// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T]] +// CHECK: bufferization.dealloc_tensor %[[T]] // CHECK: return %[[R]] +// CHECK: } func.func @sparse_new(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir index 717819bd0cb16..7280c6f5e7ba3 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -19,18 +19,16 @@ // CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]]) +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] { // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor -// CHECK: scf.yield %[[NEW_1]] +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: } -// CHECK: scf.yield %[[RET_4]] // CHECK: } // CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor @@ -39,19 +37,17 @@ // CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]]) +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] { // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: %[[RET_5:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A3:.*]] = %[[A2]]) +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor -// CHECK: scf.yield %[[NEW_2]] +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: } -// CHECK: scf.yield %[[RET_5]] // CHECK: } // CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor @@ -60,22 +56,19 @@ // CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]]) +// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] { // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: %[[RET_6:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A5:.*]] = %[[A4]]) +// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor -// CHECK: scf.yield %[[NEW_3]] +// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor // CHECK: } -// CHECK: scf.yield %[[RET_6]] // CHECK: } -// CHECK: %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts -// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor // CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index 94ee50197fa9c..c162bacffac96 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -52,16 +52,14 @@ // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) +// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index -// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] -// CHECK-RWT: scf.yield %[[NT:.*]] +// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] // CHECK-RWT: } -// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] // CHECK-RWT: return %[[T]] : tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> // func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> { @@ -113,28 +111,25 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor() // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[A0:.*]] = %[[B]]) -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[A1:.*]] = %[[A0]]) -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index -// CHECK-RWT: %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]] -// CHECK-RWT scf.yield %[[R1]] -// CHECK-RWT } -// CHECK-RWT scf.yield %[[RET_1]] -// CHECK-RWT: } -// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index +// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] +// CHECK-RWT } +// CHECK-RWT: } +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] // CHECK-RWT: return %[[T]] : tensor<100xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> // func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> { @@ -196,7 +191,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) +// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[T1:.*]] = arith.muli %[[DD0]], %[[C10]] : index @@ -205,11 +200,9 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index // CHECK-RWT: %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index -// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] -// CHECK-RWT: scf.yield %[[NT]] +// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] // CHECK-RWT: } -// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor { @@ -267,31 +260,28 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor(%[[DD0]]) // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R0:.*]] = %[[B]]) -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[R1:.*]] = %[[R0]]) -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index -// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index -// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index -// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index -// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]] -// CHECK-RWT scf.yield %[[NT]] -// CHECK-RWT } -// CHECK-RWT scf.yield %[[RET_1]] -// CHECK-RWT: } -// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index +// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index +// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index +// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index +// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] +// CHECK-RWT } +// CHECK-RWT: } +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor { From 90ad3e3c02e92cabfc7cf1f0b552ddca73d54cc8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Nov 2022 09:03:23 -0800 Subject: [PATCH 440/516] [IR] Allow available_externally GlobalAlias GlobalVariable and Function can be available_externally. GlobalAlias is used similarly. Allowing available_externally is a natural extension and helps ThinLTO discard GlobalAlias in a non-prevailing COMDAT (see D135427). For now, available_externally GlobalAlias must point to an available_externally GlobalValue (not ConstantExpr). Differential Revision: https://reviews.llvm.org/D137441 --- llvm/docs/LangRef.rst | 11 +++++++---- llvm/include/llvm/IR/GlobalAlias.h | 4 ++-- llvm/lib/IR/Verifier.cpp | 15 ++++++++++++--- llvm/test/Verifier/alias.ll | 14 +++++++++++++- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5d5916abbfef1..0006dab9b2d29 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -886,8 +886,9 @@ Syntax:: [, partition "name"] The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``, -``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers -might not correctly handle dropping a weak symbol that is aliased. +``linkonce_odr``, ``weak_odr``, ``external``, ``available_externally``. Note +that some system linkers might not correctly handle dropping a weak symbol that +is aliased. Aliases that are not ``unnamed_addr`` are guaranteed to have the same address as the aliasee expression. ``unnamed_addr`` ones are only guaranteed to point @@ -906,8 +907,10 @@ some can only be checked when producing an object file: intermediate alias being overridden cannot be represented in an object file. -* No global value in the expression can be a declaration, since that - would require a relocation, which is not possible. +* If the alias has the ``available_externally`` linkage, the aliasee must be an + ``available_externally`` global value; otherwise the aliasee can be an + expression but no global value in the expression can be a declaration, since + that would require a relocation, which is not possible. * If either the alias or the aliasee may be replaced by a symbol outside the module at link time or runtime, any optimization cannot replace the alias with diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h index 01134448a8fa7..de405da5ca231 100644 --- a/llvm/include/llvm/IR/GlobalAlias.h +++ b/llvm/include/llvm/IR/GlobalAlias.h @@ -93,8 +93,8 @@ class GlobalAlias : public GlobalValue, public ilist_node { } static bool isValidLinkage(LinkageTypes L) { - return isExternalLinkage(L) || isLocalLinkage(L) || - isWeakLinkage(L) || isLinkOnceLinkage(L); + return isExternalLinkage(L) || isLocalLinkage(L) || isWeakLinkage(L) || + isLinkOnceLinkage(L) || isAvailableExternallyLinkage(L); } // Methods for support type inquiry through isa, cast, and dyn_cast: diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 5e41fb1261575..002b5210830fe 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -817,9 +817,18 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &GA, const Constant &C) { - if (const auto *GV = dyn_cast(&C)) { - Check(!GV->isDeclarationForLinker(), "Alias must point to a definition", + if (GA.hasAvailableExternallyLinkage()) { + Check(isa(C) && + cast(C).hasAvailableExternallyLinkage(), + "available_externally alias must point to available_externally " + "global value", &GA); + } + if (const auto *GV = dyn_cast(&C)) { + if (!GA.hasAvailableExternallyLinkage()) { + Check(!GV->isDeclarationForLinker(), "Alias must point to a definition", + &GA); + } if (const auto *GA2 = dyn_cast(GV)) { Check(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); @@ -848,7 +857,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, void Verifier::visitGlobalAlias(const GlobalAlias &GA) { Check(GlobalAlias::isValidLinkage(GA.getLinkage()), "Alias should have private, internal, linkonce, weak, linkonce_odr, " - "weak_odr, or external linkage!", + "weak_odr, external, or available_externally linkage!", &GA); const Constant *Aliasee = GA.getAliasee(); Check(Aliasee, "Aliasee cannot be NULL!", &GA); diff --git a/llvm/test/Verifier/alias.ll b/llvm/test/Verifier/alias.ll index e14406550dbbd..b7675a18c0ed1 100644 --- a/llvm/test/Verifier/alias.ll +++ b/llvm/test/Verifier/alias.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=alias --implicit-check-not=Alias declare void @f() @@ -31,3 +31,15 @@ define available_externally void @f2() { @test3_c = alias i32, i32* @test3_b ; CHECK: Alias cannot point to an interposable alias ; CHECK-NEXT: i32* @test3_c + +@test4_a = available_externally global i32 42 +@test4_b = available_externally alias i32, i32* @test4_a +@test4_c = available_externally alias void(), void()* @f2 +@test4_d = available_externally alias i32, i32* @test4_b + +@test4_e = available_externally alias i32, i32* @test3_a +@test4_f = available_externally alias i32, inttoptr (i64 sub (i64 ptrtoint (i32* @test4_a to i64), i64 ptrtoint (i32* @test4_a to i64)) to i32*) +; CHECK: available_externally alias must point to available_externally global value +; CHECK-NEXT: i32* @test4_e +; CHECK: available_externally alias must point to available_externally global value +; CHECK-NEXT: i32* @test4_f From ddb68f36ae3a7ecb833e3ddf2ab8afe6ed509651 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 7 Nov 2022 08:25:19 -0800 Subject: [PATCH 441/516] [flang] Initial support for FastMathAttr setup in FirOpBuilder. Provide FirOpBuilder::setFastMathFlags() to configure FastMathFlags for the builder. Set FastMathAttr for operations based on FirOpBuilder configuration via mlir::OpBuilder::Listener. This is a little bit hacky solution, because we lose the ability to hook other listeners to FirOpBuilder. There are also potential issues with OpBuilder::clone() - the hook will be invoked for cloned operations and will effectively overwrite FastMathAttr with the ones configured in FirOpBuilder, which should not be happening. We should teach mlir::OpBuilder about FastMathAttr setup in future. Reviewed By: jeanPerier, kiranchandramohan Differential Revision: https://reviews.llvm.org/D137390 --- .../flang/Optimizer/Builder/FIRBuilder.h | 35 +++++++++++- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 12 ++++ .../Optimizer/Builder/FIRBuilderTest.cpp | 55 +++++++++++++++++++ 3 files changed, 99 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 49fc22e830b4e..5a43b1705c749 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -35,13 +35,22 @@ class BoxValue; /// Extends the MLIR OpBuilder to provide methods for building common FIR /// patterns. -class FirOpBuilder : public mlir::OpBuilder { +class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { public: explicit FirOpBuilder(mlir::Operation *op, const fir::KindMapping &kindMap) - : OpBuilder{op}, kindMap{kindMap} {} + : OpBuilder{op, /*listener=*/this}, kindMap{kindMap} {} explicit FirOpBuilder(mlir::OpBuilder &builder, const fir::KindMapping &kindMap) - : OpBuilder{builder}, kindMap{kindMap} {} + : OpBuilder{builder}, kindMap{kindMap} { + setListener(this); + } + + // The listener self-reference has to be updated in case of copy-construction. + FirOpBuilder(const FirOpBuilder &other) + : OpBuilder{other}, kindMap{other.kindMap}, fastMathFlags{ + other.fastMathFlags} { + setListener(this); + } /// Get the current Region of the insertion point. mlir::Region &getRegion() { return *getBlock()->getParent(); } @@ -393,11 +402,31 @@ class FirOpBuilder : public mlir::OpBuilder { mlir::Value ub, mlir::Value step, mlir::Type type); + /// Set default FastMathFlags value for all operations + /// supporting mlir::arith::FastMathAttr that will be created + /// by this builder. + void setFastMathFlags(mlir::arith::FastMathFlags flags) { + fastMathFlags = flags; + } + /// Dump the current function. (debug) LLVM_DUMP_METHOD void dumpFunc(); private: + /// Set attributes (e.g. FastMathAttr) to \p op operation + /// based on the current attributes setting. + void setCommonAttributes(mlir::Operation *op) const; + + /// FirOpBuilder hook for creating new operation. + void notifyOperationInserted(mlir::Operation *op) override { + setCommonAttributes(op); + } + const KindMapping &kindMap; + + /// FastMathFlags that need to be set for operations that support + /// mlir::arith::FastMathAttr. + mlir::arith::FastMathFlags fastMathFlags{}; }; } // namespace fir diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 984c2459cac66..59cc0583c24c5 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -571,6 +571,18 @@ mlir::Value fir::FirOpBuilder::genExtentFromTriplet(mlir::Location loc, return create(loc, cmp, div, zero); } +void fir::FirOpBuilder::setCommonAttributes(mlir::Operation *op) const { + auto fmi = mlir::dyn_cast(*op); + if (!fmi) + return; + // TODO: use fmi.setFastMathFlagsAttr() after D137114 is merged. + // For now set the attribute by the name. + llvm::StringRef arithFMFAttrName = fmi.getFastMathAttrName(); + if (fastMathFlags != mlir::arith::FastMathFlags::none) + op->setAttr(arithFMFAttrName, mlir::arith::FastMathFlagsAttr::get( + op->getContext(), fastMathFlags)); +} + //===--------------------------------------------------------------------===// // ExtendedValue inquiry helper implementation //===--------------------------------------------------------------------===// diff --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp index 83d3defd3d067..9defe496b9c05 100644 --- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp +++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp @@ -528,3 +528,58 @@ TEST_F(FIRBuilderTest, getBaseTypeOf) { EXPECT_TRUE(fir::isDerivedWithLenParameters(array)); } } + +TEST_F(FIRBuilderTest, genArithFastMath) { + auto builder = getBuilder(); + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto realTy = mlir::FloatType::getF32(ctx); + auto arg = builder.create(loc, realTy); + + // Test that FastMathFlags is 'none' by default. + mlir::Operation *op1 = builder.create(loc, arg, arg); + auto op1_fmi = + mlir::dyn_cast_or_null(op1); + EXPECT_TRUE(op1_fmi); + auto op1_fmf = op1_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op1_fmf, arith::FastMathFlags::none); + + // Test that the builder is copied properly. + fir::FirOpBuilder builder_copy(builder); + + arith::FastMathFlags FMF1 = + arith::FastMathFlags::contract | arith::FastMathFlags::reassoc; + builder.setFastMathFlags(FMF1); + arith::FastMathFlags FMF2 = + arith::FastMathFlags::nnan | arith::FastMathFlags::ninf; + builder_copy.setFastMathFlags(FMF2); + + // Modifying FastMathFlags for the copy must not affect the original builder. + mlir::Operation *op2 = builder.create(loc, arg, arg); + auto op2_fmi = + mlir::dyn_cast_or_null(op2); + EXPECT_TRUE(op2_fmi); + auto op2_fmf = op2_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op2_fmf, FMF1); + + // Modifying FastMathFlags for the original builder must not affect the copy. + mlir::Operation *op3 = + builder_copy.create(loc, arg, arg); + auto op3_fmi = + mlir::dyn_cast_or_null(op3); + EXPECT_TRUE(op3_fmi); + auto op3_fmf = op3_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op3_fmf, FMF2); + + // Test that the builder copy inherits FastMathFlags from the original. + fir::FirOpBuilder builder_copy2(builder); + + mlir::Operation *op4 = + builder_copy2.create(loc, arg, arg); + auto op4_fmi = + mlir::dyn_cast_or_null(op4); + EXPECT_TRUE(op4_fmi); + auto op4_fmf = op4_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op4_fmf, FMF1); +} From b62c81b836512a5d38717f47d18ef5ddf32c3e2a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Nov 2022 10:44:54 -0500 Subject: [PATCH 442/516] [VectorCombine] add test with non-canonical shuffle mask; NFC D137341 --- .../Transforms/VectorCombine/X86/load-widening.ll | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll index 384c6fa474a00..40f610d3343c6 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll @@ -339,3 +339,14 @@ define <4 x i32> @load_v2i32_v4i32_non_canonical_mask(ptr dereferenceable(16) %p %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> ret <4 x i32> %s } + +define <4 x i32> @load_v2i32_v4i32_non_canonical_mask_commute(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask_commute( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> poison, <2 x i32> [[L]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> poison, <2 x i32> %l, <4 x i32> + ret <4 x i32> %s +} From de36d39e24249feabe18f845b1868a16b798110a Mon Sep 17 00:00:00 2001 From: Miguel Saldivar Date: Mon, 7 Nov 2022 11:26:30 -0500 Subject: [PATCH 443/516] [InstCombine] Avoid passing pow attributes to sqrt As described in issue #58475, we could pass the attributes of pow to sqrt and crash. Differential Revision: https://reviews.llvm.org/D137454 --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 4 ++-- llvm/test/Transforms/InstCombine/pow-to-sqrt.ll | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/pow-to-sqrt.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6dcf5a3c68136..9b5d0b8f5daa7 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -2164,8 +2164,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return nullptr; ExpoF = &ExpoI; - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M, + B, TLI); if (!Sqrt) return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll new file mode 100644 index 0000000000000..2805456c89e82 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; This is a check to assure the attributes of `pow` do +; not get passed to sqrt. + +define void @pow_to_sqrt(double %x) { +; CHECK-LABEL: @pow_to_sqrt( +; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: ret void +; + %call = call afn double @pow(double %x, double 1.5) + ret void +} + +declare double @pow(double noundef, double noundef) From 3676a86a4322e8c2b9c541f057b5d3704146b8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Mon, 7 Nov 2022 14:46:58 +0100 Subject: [PATCH 444/516] [cmake] Add missing CMakePushCheckState include to FindLibEdit.cmake Add the missing include to fix an error when `cmake_push_check_state()` is called and incidentally the CMakePushCheckState module is not loaded by any other check running prior to `FindLibEdit.cmake`: CMake Error at /var/no-tmpfs/portage/dev-util/lldb-15.0.4/work/cmake/Modules/FindLibEdit.cmake:24 (cmake_push_check_state): Unknown CMake command "cmake_push_check_state". Call Stack (most recent call first): cmake/modules/LLDBConfig.cmake:52 (find_package) cmake/modules/LLDBConfig.cmake:59 (add_optional_dependency) CMakeLists.txt:28 (include) Gentoo Bug: https://bugs.gentoo.org/880065 Differential Revision: https://reviews.llvm.org/D137555 --- cmake/Modules/FindLibEdit.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/Modules/FindLibEdit.cmake b/cmake/Modules/FindLibEdit.cmake index 7e62d4d839ae1..de8f5a2e71013 100644 --- a/cmake/Modules/FindLibEdit.cmake +++ b/cmake/Modules/FindLibEdit.cmake @@ -21,6 +21,7 @@ find_library(LibEdit_LIBRARIES NAMES edit HINTS ${PC_LIBEDIT_LIBRARY_DIRS}) include(CheckIncludeFile) if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h") + include(CMakePushCheckState) cmake_push_check_state() list(APPEND CMAKE_REQUIRED_INCLUDES ${LibEdit_INCLUDE_DIRS}) list(APPEND CMAKE_REQUIRED_LIBRARIES ${LibEdit_LIBRARIES}) From fe2069284d10f78c6472f5934983c5740dea8039 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Mon, 7 Nov 2022 18:23:22 +0100 Subject: [PATCH 445/516] [flang] Allow fir.class as fir.save_result operand #0 Reviewed By: jeanPerier, PeteSteinfeld Differential Revision: https://reviews.llvm.org/D137546 --- .../flang/Optimizer/Dialect/FIRTypes.td | 2 +- flang/test/Lower/polymorphic.f90 | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td index 0d06e1d118ea7..eaf43a6e908aa 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td +++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td @@ -635,7 +635,7 @@ def AnyAddressableLike : TypeConstraint, "any addressable">; def ArrayOrBoxOrRecord : TypeConstraint, + IsBaseBoxTypePred, fir_RecordType.predicate]>, "fir.box, fir.array or fir.type">; diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 232dfada79194..d828c0c209349 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -58,4 +58,22 @@ subroutine test_allocate_unlimited_polymorphic_non_derived() ! CHECK-LABEL: test_allocate_unlimited_polymorphic_non_derived ! CHECK-NOT: _FortranAPointerNullifyDerived ! CHECK: fir.call @_FortranAPointerAllocate + + function test_fct_ret_class() + class(p1), pointer :: test_fct_ret_class + end function + + subroutine call_fct() + class(p1), pointer :: p + p => test_fct_ret_class() + end subroutine + +! CHECK-LABEL: func.func @_QMpolymorphic_testPtest_fct_ret_class() -> !fir.class>> +! CHECK: return %{{.*}} : !fir.class>> + +! CHECK-lABEL: func.func @_QMpolymorphic_testPcall_fct() +! CHECK: %[[RESULT:.*]] = fir.alloca !fir.class>> {bindc_name = ".result"} +! CHECK: %[[CALL_RES:.*]] = fir.call @_QMpolymorphic_testPtest_fct_ret_class() : () -> !fir.class>> +! CHECK: fir.save_result %[[CALL_RES]] to %[[RESULT]] : !fir.class>>, !fir.ref>>> + end module From 8f3f15c1a208932689a8bdef22d6ca3d4c3408c5 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 7 Nov 2022 09:05:27 -0800 Subject: [PATCH 446/516] [flang] Configure FirOpBuilder based on math driver options. Added MathOptionsBase to share fastmath config between different components. Frontend driver translates LangOptions into MathOptionsBase. FirConverter configures FirOpBuilder using MathOptionsBase config passed to it via LoweringOptions. Depends on D137390 Reviewed By: jeanPerier Differential Revision: https://reviews.llvm.org/D137391 --- .../include/flang/Common/MathOptionsBase.def | 25 +++++++++++ flang/include/flang/Common/MathOptionsBase.h | 44 +++++++++++++++++++ flang/include/flang/Lower/LoweringOptions.h | 12 +++++ .../flang/Optimizer/Builder/FIRBuilder.h | 5 +++ flang/lib/Frontend/CompilerInvocation.cpp | 12 ++++- flang/lib/Lower/Bridge.cpp | 3 ++ flang/lib/Lower/LoweringOptions.cpp | 2 +- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 12 +++++ flang/test/Lower/fast-math-arithmetic.f90 | 13 ++++++ 9 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 flang/include/flang/Common/MathOptionsBase.def create mode 100644 flang/include/flang/Common/MathOptionsBase.h create mode 100644 flang/test/Lower/fast-math-arithmetic.f90 diff --git a/flang/include/flang/Common/MathOptionsBase.def b/flang/include/flang/Common/MathOptionsBase.def new file mode 100644 index 0000000000000..64b3959a1c53e --- /dev/null +++ b/flang/include/flang/Common/MathOptionsBase.def @@ -0,0 +1,25 @@ +//===--- MathOptionsBase.def - Math options config ---------------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines math options. Users of this file must define +/// ENUM_MATHOPT macro to make use of this information. +/// +//===----------------------------------------------------------------------===// + +#ifndef ENUM_MATHOPT +# error Define the ENUM_MATHOPT macro to handle lowering options +#endif + +/// Allow fusing FP operations (e.g. create FMAs from mul/add). +ENUM_MATHOPT(FPContractEnabled, unsigned, 1, 0) + +/// Permit floating point optimizations without regard to infinities. +ENUM_MATHOPT(NoHonorInfs, unsigned, 1, 0) + +#undef ENUM_MATHOPT diff --git a/flang/include/flang/Common/MathOptionsBase.h b/flang/include/flang/Common/MathOptionsBase.h new file mode 100644 index 0000000000000..7f8ebdbee1987 --- /dev/null +++ b/flang/include/flang/Common/MathOptionsBase.h @@ -0,0 +1,44 @@ +//===- MathOptionsBase.h - Math options config ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Options controlling mathematical computations generated in FIR. +/// This is intended to be header-only implementation without extra +/// dependencies so that multiple components can use it to exchange +/// math configuration. +/// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_COMMON_MATHOPTIONSBASE_H +#define FORTRAN_COMMON_MATHOPTIONSBASE_H + +namespace Fortran::common { + +class MathOptionsBase { +public: +#define ENUM_MATHOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ + MathOptionsBase &set##Name(Type Value) { \ + Name = static_cast(Value); \ + return *this; \ + } +#include "flang/Common/MathOptionsBase.def" + + MathOptionsBase() { +#define ENUM_MATHOPT(Name, Type, Bits, Default) set##Name(Default); +#include "flang/Common/MathOptionsBase.def" + } + +private: +#define ENUM_MATHOPT(Name, Type, Bits, Default) unsigned Name : Bits; +#include "flang/Common/MathOptionsBase.def" +}; + +} // namespace Fortran::common + +#endif // FORTRAN_COMMON_MATHOPTIONSBASE_H diff --git a/flang/include/flang/Lower/LoweringOptions.h b/flang/include/flang/Lower/LoweringOptions.h index dd297e41bded2..8105ccd7ef6b1 100644 --- a/flang/include/flang/Lower/LoweringOptions.h +++ b/flang/include/flang/Lower/LoweringOptions.h @@ -15,6 +15,8 @@ #ifndef FLANG_LOWER_LOWERINGOPTIONS_H #define FLANG_LOWER_LOWERINGOPTIONS_H +#include "flang/Common/MathOptionsBase.h" + namespace Fortran::lower { class LoweringOptionsBase { @@ -42,6 +44,16 @@ class LoweringOptions : public LoweringOptionsBase { #include "flang/Lower/LoweringOptions.def" LoweringOptions(); + + const Fortran::common::MathOptionsBase &getMathOptions() const { + return MathOptions; + } + + Fortran::common::MathOptionsBase &getMathOptions() { return MathOptions; } + +private: + /// Options for handling/optimizing mathematical computations. + Fortran::common::MathOptionsBase MathOptions; }; } // namespace Fortran::lower diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 5a43b1705c749..a28ada96ecf7a 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -16,6 +16,7 @@ #ifndef FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H #define FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H +#include "flang/Common/MathOptionsBase.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Support/KindMapping.h" @@ -409,6 +410,10 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { fastMathFlags = flags; } + /// Set default FastMathFlags value from the passed MathOptionsBase + /// config. + void setFastMathFlags(Fortran::common::MathOptionsBase options); + /// Dump the current function. (debug) LLVM_DUMP_METHOD void dumpFunc(); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index bb87ea285a265..f2180145af714 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -944,8 +944,18 @@ void CompilerInvocation::setSemanticsOpts( /// Set \p loweringOptions controlling lowering behavior based /// on the \p optimizationLevel. void CompilerInvocation::setLoweringOptions() { - const auto &codegenOpts = getCodeGenOpts(); + const CodeGenOptions &codegenOpts = getCodeGenOpts(); // Lower TRANSPOSE as a runtime call under -O0. loweringOpts.setOptimizeTranspose(codegenOpts.OptimizationLevel > 0); + + const LangOptions &langOptions = getLangOpts(); + Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); + // TODO: when LangOptions are finalized, we can represent + // the math related options using Fortran::commmon::MathOptionsBase, + // so that we can just copy it into LoweringOptions. + mathOpts + .setFPContractEnabled(langOptions.getFPContractMode() == + LangOptions::FPM_Fast) + .setNoHonorInfs(langOptions.NoHonorInfs); } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index e9490b80566fe..6ab001b850fd2 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2884,6 +2884,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments(); builder = new fir::FirOpBuilder(func, bridge.getKindMap()); assert(builder && "FirOpBuilder did not instantiate"); + builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); builder->setInsertionPointToStart(&func.front()); func.setVisibility(mlir::SymbolTable::Visibility::Public); @@ -3087,6 +3088,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::FunctionType::get(context, llvm::None, llvm::None)); func.addEntryBlock(); builder = new fir::FirOpBuilder(func, bridge.getKindMap()); + assert(builder && "FirOpBuilder did not instantiate"); + builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); createGlobals(); if (mlir::Region *region = func.getCallableRegion()) region->dropAllReferences(); diff --git a/flang/lib/Lower/LoweringOptions.cpp b/flang/lib/Lower/LoweringOptions.cpp index 22247faa4cab9..9456abf0e8dea 100644 --- a/flang/lib/Lower/LoweringOptions.cpp +++ b/flang/lib/Lower/LoweringOptions.cpp @@ -14,7 +14,7 @@ namespace Fortran::lower { -LoweringOptions::LoweringOptions() { +LoweringOptions::LoweringOptions() : MathOptions{} { #define LOWERINGOPT(Name, Bits, Default) Name = Default; #define ENUM_LOWERINGOPT(Name, Type, Bits, Default) set##Name(Default); #include "flang/Lower/LoweringOptions.def" diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 59cc0583c24c5..50fc21b0f256b 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -583,6 +583,18 @@ void fir::FirOpBuilder::setCommonAttributes(mlir::Operation *op) const { op->getContext(), fastMathFlags)); } +void fir::FirOpBuilder::setFastMathFlags( + Fortran::common::MathOptionsBase options) { + mlir::arith::FastMathFlags arithFMF{}; + if (options.getFPContractEnabled()) { + arithFMF = arithFMF | mlir::arith::FastMathFlags::contract; + } + if (options.getNoHonorInfs()) { + arithFMF = arithFMF | mlir::arith::FastMathFlags::ninf; + } + setFastMathFlags(arithFMF); +} + //===--------------------------------------------------------------------===// // ExtendedValue inquiry helper implementation //===--------------------------------------------------------------------===// diff --git a/flang/test/Lower/fast-math-arithmetic.f90 b/flang/test/Lower/fast-math-arithmetic.f90 new file mode 100644 index 0000000000000..cc7a7dcf210e3 --- /dev/null +++ b/flang/test/Lower/fast-math-arithmetic.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -emit-fir -ffp-contract=fast %s -o - 2>&1 | FileCheck --check-prefixes=CONTRACT,ALL %s +! RUN: %flang_fc1 -emit-fir -menable-no-infs %s -o - 2>&1 | FileCheck --check-prefixes=NINF,ALL %s + +! ALL-LABEL: func.func @_QPtest +subroutine test(x) + real x +! CONTRACT: arith.mulf{{.*}}, {{.*}} fastmath<[[ATTRS:contract]]> : f32 +! NINF: arith.mulf{{.*}}, {{.*}} fastmath<[[ATTRS:ninf]]> : f32 +! ALL: arith.divf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 +! ALL: arith.addf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 +! ALL: arith.subf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 + x = x * x + x / x - x +end subroutine test From 265a73043d34af25d88a238b84ed9c0c34321348 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 7 Nov 2022 18:22:05 +0100 Subject: [PATCH 447/516] Use double hashes for non-run/check lines in lld/test/ELF/basic.s --- lld/test/ELF/basic.s | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lld/test/ELF/basic.s b/lld/test/ELF/basic.s index 6b17bf75ee7bb..33c01944b7c1b 100644 --- a/lld/test/ELF/basic.s +++ b/lld/test/ELF/basic.s @@ -6,7 +6,7 @@ # RUN: | FileCheck %s # RUN: ld.lld %t -o /dev/null -# exits with return code 42 on linux +## exits with return code 42 on linux .globl _start _start: mov $60, %rax @@ -201,19 +201,19 @@ _start: # CHECK-NEXT: } # CHECK-NEXT: ] -# Test for the response file (POSIX quoting style) +## Test for the response file (POSIX quoting style) # RUN: echo " -o %t2" > %t.responsefile # RUN: ld.lld %t --rsp-quoting=posix @%t.responsefile # RUN: llvm-readobj --file-headers --sections -l --symbols %t2 \ # RUN: | FileCheck %s -# Test for the response file (Windows quoting style) +## Test for the response file (Windows quoting style) # RUN: echo " c:\blah\foo" > %t.responsefile # RUN: not ld.lld --rsp-quoting=windows %t @%t.responsefile 2>&1 | FileCheck \ # RUN: %s --check-prefix=WINRSP # WINRSP: cannot open c:\blah\foo -# Test for the response file (invalid quoting style) +## Test for the response file (invalid quoting style) # RUN: not ld.lld --rsp-quoting=patatino %t 2>&1 | FileCheck %s \ # RUN: --check-prefix=INVRSP # INVRSP: invalid response file quoting: patatino From 19a7939404a3b932c26cd2a6a29f0669acebd702 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 5 Nov 2022 04:37:45 +0100 Subject: [PATCH 448/516] [lld] Check errors from expanding response files Previously the response file expansion code would print the error, but lld would not exit, which was odd. lld does response file expansion in the different drivers, but it's also done in main() first, so it's enough to check there. By checking for these errors we would have caught when D136090 introduced a bug that made lld print errors for response files which contained "-rpath @foo". Differental revision: https://reviews.llvm.org/D137477 --- lld/test/ELF/basic.s | 6 ++++++ lld/tools/lld/lld.cpp | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lld/test/ELF/basic.s b/lld/test/ELF/basic.s index 33c01944b7c1b..587fd1641500a 100644 --- a/lld/test/ELF/basic.s +++ b/lld/test/ELF/basic.s @@ -218,6 +218,12 @@ _start: # RUN: --check-prefix=INVRSP # INVRSP: invalid response file quoting: patatino +## Test erroring on a recursive response file, but only once. +# RUN: echo @%t.responsefile > %t.responsefile +# RUN: not ld.lld %t @%t.responsefile 2>&1 | FileCheck %s --check-prefix=RECRSP +# RECRSP: recursive expansion of: '{{.*}}.responsefile' +# RECRSP-NOT: recursive expansion of + # RUN: not ld.lld %t.foo -o /dev/null 2>&1 | \ # RUN: FileCheck -DMSG=%errc_ENOENT --check-prefix=MISSING %s # MISSING: cannot open {{.*}}.foo: [[MSG]] diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index b0e28d15fa29e..700c0b770e201 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -89,7 +89,9 @@ static bool isPETarget(std::vector &v) { SmallVector expandedArgs(v.data(), v.data() + v.size()); BumpPtrAllocator a; StringSaver saver(a); - cl::ExpandResponseFiles(saver, getDefaultQuotingStyle(), expandedArgs); + cl::ExpansionContext ECtx(saver.getAllocator(), getDefaultQuotingStyle()); + if (Error Err = ECtx.expandResponseFiles(expandedArgs)) + die(toString(std::move(Err))); for (auto it = expandedArgs.begin(); it + 1 != expandedArgs.end(); ++it) { if (StringRef(*it) != "-m") continue; From 99171078bb9bea3f31be948e124ec945a50e1fe1 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Mon, 7 Nov 2022 09:34:10 -0800 Subject: [PATCH 449/516] Revert "[mlir][sparse] extend foreach operation to accept reduction arguments." This reverts commit 53d5d3401120f2aa741a73a5a9ba0ce012ca532c. This is causing a build failure on the windows mlir bot that was previously hidden by another sparse tensor change that caused failures: https://lab.llvm.org/buildbot/#/builders/13/builds/28006 --- .../SparseTensor/IR/SparseTensorOps.td | 66 +++++++------------ .../SparseTensor/IR/SparseTensorDialect.cpp | 36 ++-------- .../Transforms/SparseTensorRewriting.cpp | 59 +++++++---------- mlir/test/Dialect/SparseTensor/invalid.mlir | 45 ------------- mlir/test/Dialect/SparseTensor/roundtrip.mlir | 20 ------ 5 files changed, 54 insertions(+), 172 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 52a6aff752792..5d667448e2f37 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -896,44 +896,21 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", [SingleBlockImplicitTerminator<"YieldOp">]>, - Arguments<(ins AnyTensor:$tensor, - Variadic:$initArgs)>, - Results<(outs Variadic:$results)> { + Arguments<(ins AnyTensor:$tensor)>{ let summary = "Iterates over elements in a tensor"; let description = [{ Iterates over stored elements in a tensor (which are typically, but not always, non-zero for sparse tensors) and executes the block. - For an input tensor with rank n, the block must take n + 1 (and additional loop - carried variables as described below) arguments. The first n arguments must be - Index type, together indicating the current coordinates of the element being visited. - The last argument must have the same type as the + For an input tensor with rank n, the block must take n + 1 arguments. The + first n arguments must be Index type, together indicating the current coordinates + of the element being visited. The last argument must have the same type as the tensor's element type, representing the actual value loaded from the input tensor at the given coordinates. - `sparse_tensor.foreach` can also operate on loop-carried variables and returns - the final values after loop termination. The initial values of the variables are - passed as additional SSA operands to the "sparse_tensor.foreach" following the n + 1 - SSA values mentioned above (n coordinate and 1 value). - - The region must terminate with a "sparse_tensor.yield" that passes the current - values of all loop-carried variables to the next iteration, or to the - result, if at the last iteration. The number and static types of loop-carried - variables may not change with iterations. - - For example: - ```mlir - %c0 = arith.constant 0 : i32 - %ret = sparse_tensor.foreach in %0 init(%c0): tensor, i32 -> i32 do { - ^bb0(%arg1: index, %arg2: index, %arg3: i32, %iter: i32): - %sum = arith.add %iter, %arg3 - sparse_tensor.yield %sum - } - ``` - - It is important to note that foreach generated loop iterates over the stored elements - in the storage order. However, no matter what storage order is used, the indices passed - to the block always obey the original dimension order. + Note that foreach generated loop iterates over the stored elements in the storage + order. However, no matter what storage order is used, the indices passed to the block + always obey the original dimension order. For example: ```mlir @@ -941,10 +918,10 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(i,j) -> (j,i)> }> - + // foreach on a column-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #COL_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1] } @@ -954,25 +931,30 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", // foreach on a row-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #ROW_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1] } ``` + + Example: + + ```mlir + sparse_tensor.foreach in %0 : tensor do { + ^bb0(%arg1: index, %arg2: index, %arg3: f64): + do something... + } + ``` }]; let builders = [ - OpBuilder<(ins "Value":$tensor, - "function_ref")>, - OpBuilder<(ins "Value":$tensor, - "ValueRange":$iterArgs, - "function_ref")> + OpBuilder<( + ins "Value":$tensor, + "function_ref")> ]; - let regions = (region SizedRegion<1>:$region); - let assemblyFormat = "`in` $tensor (`init``(`$initArgs^`)`)? attr-dict" - " `:` type($tensor) (`,` type($initArgs)^)?" - " (`->` type($results)^)? `do` $region"; + let regions = (region AnyRegion:$region); + let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region"; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index bfd38e12ea36c..b0c88e161a50d 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -581,20 +581,11 @@ LogicalResult CompressOp::verify() { void ForeachOp::build( OpBuilder &builder, OperationState &result, Value tensor, - function_ref - bodyBuilder) { - build(builder, result, tensor, llvm::None, bodyBuilder); -} - -void ForeachOp::build( - OpBuilder &builder, OperationState &result, Value tensor, - ValueRange initArgs, - function_ref - bodyBuilder) { - build(builder, result, initArgs.getTypes(), tensor, initArgs); - // Builds foreach body. + function_ref bodyBuilder) { + build(builder, result, tensor); if (!bodyBuilder) return; + auto rtp = tensor.getType().cast(); int64_t rank = rtp.getRank(); @@ -611,38 +602,23 @@ void ForeachOp::build( auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuilder(builder, result.location, - bodyBlock->getArguments().slice(0, rank), - bodyBlock->getArguments()[rank], - bodyBlock->getArguments().drop_front(rank + 1)); + bodyBuilder(builder, result.location, bodyBlock->getArguments()); } LogicalResult ForeachOp::verify() { auto t = getTensor().getType().cast(); auto args = getBody()->getArguments(); - if (static_cast(t.getRank()) + 1 + getInitArgs().size() != - args.size()) + if (static_cast(t.getRank()) + 1 != args.size()) return emitError("Unmatched number of arguments in the block"); - if (getNumResults() != getInitArgs().size()) - return emitError("Mismatch in number of init arguments and results"); - - if (getResultTypes() != getInitArgs().getTypes()) - return emitError("Mismatch in types of init arguments and results"); - - auto yield = cast(getBody()->getTerminator()); - if (yield.getNumOperands() != getNumResults() || - yield.getOperands().getTypes() != getResultTypes()) - return emitError("Mismatch in types of yield values and results"); - for (int64_t i = 0, e = t.getRank(); i < e; i++) if (args[i].getType() != IndexType::get(getContext())) emitError( llvm::formatv("Expecting Index type for argument at index {0}", i)); auto elemTp = t.getElementType(); - auto valueTp = args[t.getRank()].getType(); + auto valueTp = args.back().getType(); if (elemTp != valueTp) emitError(llvm::formatv("Unmatched element type between input tensor and " "block argument, expected:{0}, got: {1}", diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 7747fd73aa9bb..9c002f1ae0ec8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -357,9 +357,7 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); rewriter.create( - loc, srcTensor, llvm::None, - [&](OpBuilder &builder, Location loc, ValueRange args, Value v, - ValueRange reduc) { + loc, srcTensor, [&](OpBuilder &builder, Location loc, ValueRange args) { SmallVector srcIndices; SmallVector dstIndices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { @@ -368,7 +366,7 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - builder.create(loc, v, cooBuffer, dstIndices); + builder.create(loc, args.back(), cooBuffer, dstIndices); builder.create(loc); }); @@ -448,9 +446,7 @@ struct ConcatenateRewriter : public OpRewritePattern { // Build a for op for each input tensor to append new values into the // output tensor. rewriter.create( - loc, input, llvm::None, - [&](OpBuilder &builder, Location loc, ValueRange args, Value v, - ValueRange reduc) { + loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) { SmallVector indices; for (int64_t i = 0; i < rank; i++) { uint64_t dim = @@ -461,7 +457,7 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - builder.create(loc, v, cooBuffer, indices); + builder.create(loc, args.back(), cooBuffer, indices); builder.create(loc); }); // Accumulates the offset. Note that only static-shaped inputs are allowed @@ -562,13 +558,12 @@ struct ConvertRewriter : public OpRewritePattern { sizesForTensor(rewriter, sizes, loc, srcTp, src); Value dst = allocDenseTensor(rewriter, loc, dstTp, sizes); - rewriter.create(loc, src, llvm::None, - [&](OpBuilder &builder, Location loc, - ValueRange args, Value v, ValueRange reduc) { - builder.create(loc, v, dst, - args); - builder.create(loc); - }); + rewriter.create( + loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + builder.create(loc, args.back(), dst, + args.drop_back()); + builder.create(loc); + }); rewriter.replaceOpWithNewOp(op, dstTp, dst); return success(); @@ -603,15 +598,13 @@ struct ConvertRewriter : public OpRewritePattern { tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); rewriter.create( - loc, src, llvm::None, - [&](OpBuilder &builder, Location loc, ValueRange args, Value v, - ValueRange reduc) { + loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - builder.create(loc, v, tmpCoo, indices); + builder.create(loc, args.back(), tmpCoo, indices); builder.create(loc); }); src = tmpCoo; @@ -653,18 +646,16 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - rewriter.create(loc, src, llvm::None, - [&](OpBuilder &builder, Location loc, - ValueRange args, Value v, ValueRange reduc) { - SmallVector indices; - for (int64_t i = 0, e = srcTp.getRank(); i < e; - i++) { - uint64_t dim = toStoredDim(encDst, i); - indices.push_back(args[dim]); - } - builder.create(loc, v, dst, indices); - builder.create(loc); - }); + rewriter.create( + loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + SmallVector indices; + for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { + uint64_t dim = toStoredDim(encDst, i); + indices.push_back(args[dim]); + } + builder.create(loc, args.back(), dst, indices); + builder.create(loc); + }); // Release the temporary COO if it is created. if (tmpCoo) @@ -875,14 +866,12 @@ struct OutRewriter : public OpRewritePattern { ModuleOp module = op->getParentOfType(); // For each element in the source tensor, output the element. rewriter.create( - loc, src, llvm::None, - [&](OpBuilder &builder, Location loc, ValueRange args, Value v, - ValueRange reduc) { + loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { for (uint64_t i = 0; i < rank; i++) { rewriter.create(loc, args[i], indices, constantIndex(builder, loc, i)); } - rewriter.create(loc, v, value); + rewriter.create(loc, args.back(), value); SmallVector operands{writer, rankValue, indices, value}; FlatSymbolRefAttr fn = getFunc(module, outNextFuncName, {}, operands, EmitCInterface::On); diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 02fb97bc866c6..dd27ce398c203 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -551,51 +551,6 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { // ----- -#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> -func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { - // expected-error@+1 {{Unmatched element type between input tensor and block argument}} - sparse_tensor.foreach in %arg0 : tensor<2x4xf64, #DCSR> do { - ^bb0(%1: index, %2: index, %v: f32) : - } - return -} - -// ----- - -#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> -func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { - // expected-error@+1 {{Mismatch in number of init arguments and results}} - sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 do { - ^bb0(%1: index, %2: index, %v: f32, %r1 : i32) : - } - return -} - -// ----- - -#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> -func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { - // expected-error@+1 {{Mismatch in types of init arguments and results}} - %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> i32 do { - ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : - } - return -} - -// ----- - -#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> -func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { - // expected-error@+1 {{Mismatch in types of yield values and results}} - %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> f32 do { - ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : - sparse_tensor.yield %1 : index - } - return -} - -// ----- - // TODO: a test case with empty xs doesn't work due to some parser issues. func.func @sparse_sort_x_type( %arg0: index, %arg1: memref) { diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index bc664ae3d2d00..0ef58db148525 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -411,26 +411,6 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { return } -// ----- - -#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> - -// CHECK-LABEL: func @sparse_tensor_foreach( -// CHECK-SAME: %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>, -// CHECK-SAME: %[[A1:.*]]: f32 -// CHECK-NEXT: %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]]) -// CHECK-NEXT: ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32) -// CHECK: sparse_tensor.yield %[[TMP_r]] : f32 -// CHECK: } -func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { - %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<2x4xf64, #DCSR>, f32 -> f32 - do { - ^bb0(%1: index, %2: index, %v: f64, %r: f32) : - sparse_tensor.yield %r : f32 - } - return -} - // ---- // CHECK-LABEL: func @sparse_sort_1d0v( From b8651a171733f86074767d6240c4fc694cdff7ad Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 7 Nov 2022 07:36:11 +0000 Subject: [PATCH 450/516] [AMDGPU] Merge GlobalISel tests into SelectionDAG tests. NFC. Remove GlobalISel test files that only contained RUN lines running the code from the SelectionDAG version of the same test. Differential Revision: https://reviews.llvm.org/D137533 --- .../CodeGen/AMDGPU/GlobalISel/lds-size.ll | 1 - .../GlobalISel/llvm.amdgcn.ds.append.ll | 4 --- .../GlobalISel/llvm.amdgcn.ds.consume.ll | 4 --- .../GlobalISel/llvm.amdgcn.ds.gws.barrier.ll | 10 ------- .../GlobalISel/llvm.amdgcn.ds.gws.init.ll | 6 ----- .../GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll | 6 ----- .../GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll | 6 ----- .../llvm.amdgcn.ds.ordered.add.gfx10.ll | 2 -- .../GlobalISel/llvm.amdgcn.ds.ordered.add.ll | 4 --- .../GlobalISel/llvm.amdgcn.ds.ordered.swap.ll | 4 --- .../GlobalISel/llvm.amdgcn.init.exec.ll | 3 --- .../llvm.amdgcn.init.exec.wave32.ll | 5 ---- .../AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll | 2 -- .../AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll | 4 --- .../CodeGen/AMDGPU/GlobalISel/llvm.trap.ll | 16 ----------- .../AMDGPU/GlobalISel/read_register.ll | 2 -- .../AMDGPU/GlobalISel/readcyclecounter.ll | 5 ---- llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll | 3 --- .../AMDGPU/GlobalISel/write_register.ll | 2 -- llvm/test/CodeGen/AMDGPU/lds-size.ll | 3 ++- .../CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll | 12 ++++++--- .../CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll | 22 +++++++++------ .../AMDGPU/llvm.amdgcn.ds.gws.barrier.ll | 24 +++++++++++------ .../CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll | 18 ++++++++----- .../AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll | 18 ++++++++----- .../AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll | 18 ++++++++----- .../llvm.amdgcn.ds.ordered.add.gfx10.ll | 6 +++-- .../AMDGPU/llvm.amdgcn.ds.ordered.add.ll | 12 ++++++--- .../AMDGPU/llvm.amdgcn.ds.ordered.swap.ll | 12 ++++++--- .../CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll | 9 ++++--- .../AMDGPU/llvm.amdgcn.init.exec.wave32.ll | 12 ++++++--- .../CodeGen/AMDGPU/llvm.amdgcn.permlane.ll | 6 +++-- .../CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll | 9 ++++--- llvm/test/CodeGen/AMDGPU/read_register.ll | 3 ++- llvm/test/CodeGen/AMDGPU/readcyclecounter.ll | 15 +++++++---- llvm/test/CodeGen/AMDGPU/ret.ll | 6 +++-- llvm/test/CodeGen/AMDGPU/trap.ll | 27 ++++++++++++------- llvm/test/CodeGen/AMDGPU/write_register.ll | 3 ++- 38 files changed, 156 insertions(+), 168 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll deleted file mode 100644 index 208500c28b4b3..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll +++ /dev/null @@ -1 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %S/../lds-size.ll | FileCheck -check-prefix=ALL -check-prefix=HSA %S/../lds-size.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll deleted file mode 100644 index 1b09c62519127..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll +++ /dev/null @@ -1,4 +0,0 @@ -; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll deleted file mode 100644 index 7aea170ed1ef8..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll +++ /dev/null @@ -1,4 +0,0 @@ -; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll deleted file mode 100644 index 449a8ab04ba03..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll +++ /dev/null @@ -1,10 +0,0 @@ -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll - -; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll deleted file mode 100644 index 3dceb31d92721..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll +++ /dev/null @@ -1,6 +0,0 @@ -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll deleted file mode 100644 index ada1267253714..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll deleted file mode 100644 index 20725da516790..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll deleted file mode 100644 index ba67a6e6365e4..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll +++ /dev/null @@ -1,2 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll deleted file mode 100644 index 4193d976afd65..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll deleted file mode 100644 index e2c3b625395a7..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll deleted file mode 100644 index 9a6bebd5a31c6..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll -; RUN: llc -march=amdgcn -mcpu=gfx1010 -global-isel -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll -; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll deleted file mode 100644 index d6d20abcd8aa2..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll +++ /dev/null @@ -1,5 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll deleted file mode 100644 index e644b907824ad..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll +++ /dev/null @@ -1,2 +0,0 @@ -; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %S/../llvm.amdgcn.permlane.ll -; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %S/../llvm.amdgcn.permlane.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll deleted file mode 100644 index 715f3820787fb..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll +++ /dev/null @@ -1,4 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64 %S/../llvm.amdgcn.wqm.vote.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll deleted file mode 100644 index f1ff3825c5faf..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll +++ /dev/null @@ -1,16 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP -enable-var-scope %S/../trap.ll - -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -enable-var-scope %S/../trap.ll - -; enable trap handler feature -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT -enable-var-scope %S/../trap.ll - -; disable trap handler feature -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT -enable-var-scope %S/../trap.ll - -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -enable-var-scope %S/../trap.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll deleted file mode 100644 index 3bd16996f8e33..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll +++ /dev/null @@ -1,2 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %S/../read_register.ll | FileCheck -enable-var-scope %S/../read_register.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll deleted file mode 100644 index 8d9ab9cada75e..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll +++ /dev/null @@ -1,5 +0,0 @@ -; SI run line skipped since store not yet implemented. -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll deleted file mode 100644 index c6c1a87177fff..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll +++ /dev/null @@ -1,3 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll deleted file mode 100644 index 865b4c1817cdc..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll +++ /dev/null @@ -1,2 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %S/../write_register.ll | FileCheck -enable-var-scope %S/../write_register.ll diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll index 4a94a95f081bc..313e4d0e07426 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll index 2da96c4480608..3005437edd73e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s +; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %s ; GCN-LABEL: {{^}}ds_append_lds: ; GCN: s_load_dword [[PTR:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll index 40f20bc795222..59c6549ad6ad9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s +; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,GCN-GISEL %s ; GCN-LABEL: {{^}}ds_consume_lds: ; GCN: s_load_dword [[PTR:s[0-9]+]] @@ -52,9 +56,10 @@ define amdgpu_kernel void @ds_consume_no_fold_offset_si(i32 addrspace(3)* addrsp ; GCN: s_load_dword [[PTR:s[0-9]+]] ; SI: s_bitset1_b32 [[PTR]], 16 -; CIPLUS: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; CIPLUS-GISEL: s_add_u32 [[PTR]], [[PTR]], 0x10000 -; GCN: s_mov_b32 m0, [[PTR]] +; GCN-SDAG: s_mov_b32 m0, [[PTR]] ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] @@ -66,8 +71,9 @@ define amdgpu_kernel void @ds_consume_lds_over_max_offset(i32 addrspace(3)* %lds } ; GCN-LABEL: {{^}}ds_consume_lds_vgpr_addr: -; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; GCN: s_mov_b32 m0, [[READLANE]] +; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 +; GCN-SDAG: s_mov_b32 m0, [[READLANE]] +; GCN-GISEL: v_readfirstlane_b32 m0, v0 ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index 8ca8148e88206..d7a4ba9dc5eb2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -1,13 +1,21 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %s ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s ; Minimum offset diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll index f656af44746fb..f87a3eaad63a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s ; Minimum offset ; GCN-LABEL: {{^}}gws_init_offset0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll index 18f187a0bb71b..da64f7350a921 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s ; GCN-LABEL: {{^}}gws_sema_br_offset0: ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll index 14c6a478d8a52..215c394409ac0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s ; GCN-LABEL: {{^}}gws_sema_v_offset0: ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll index 4296455a018db..6a9d10fbfb3da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll @@ -1,5 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}ds_ordered_add: ; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll index 4009c5a63449f..76bd2270a47bf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s ; FUNC-LABEL: {{^}}ds_ordered_add: ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll index 79933d2159d82..76266919b5ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s ; FUNC-LABEL: {{^}}ds_ordered_swap: ; GCN: s_mov_b32 m0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll index aaf73ea8d6c99..f7f1f96f4d3da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -1,6 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}full_mask: ; GCN: s_mov_b64 exec, -1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll index 1355274ae9ead..4098b2b337232 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s ; GCN-LABEL: {{^}}test_init_exec: ; GFX1032: s_mov_b32 exec_lo, 0x12345 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll index 6b233f9a59e5f..ca48ce8a08c4a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -1,5 +1,7 @@ -; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s -; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s +; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s +; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s +; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s +; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll index 768165368fb03..182275b687a68 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -1,6 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s ;CHECK-LABEL: {{^}}ret: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/read_register.ll b/llvm/test/CodeGen/AMDGPU/read_register.ll index 8fd2fb05cea22..8e8fc44bf57df 100644 --- a/llvm/test/CodeGen/AMDGPU/read_register.ll +++ b/llvm/test/CodeGen/AMDGPU/read_register.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.read_register.i32(metadata) #0 declare i64 @llvm.read_register.i64(metadata) #0 diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll index f339bd86ea5dd..79ba68b339091 100644 --- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -1,8 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; -global-isel=1 SI run line skipped since store not yet implemented. +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s declare i64 @llvm.readcyclecounter() #0 diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index cee224bfa3899..97c07a3d7773f 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -1,5 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vgpr: ; GCN-DAG: v_mov_b32_e32 v1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll index 64f4064610468..77b8b47d1f0b0 100644 --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -1,18 +1,27 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; enable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s ; disable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported diff --git a/llvm/test/CodeGen/AMDGPU/write_register.ll b/llvm/test/CodeGen/AMDGPU/write_register.ll index eb9b103775a81..d385425c3e291 100644 --- a/llvm/test/CodeGen/AMDGPU/write_register.ll +++ b/llvm/test/CodeGen/AMDGPU/write_register.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s declare void @llvm.write_register.i32(metadata, i32) #0 declare void @llvm.write_register.i64(metadata, i64) #0 From f81f880871e04ef0284af14a141a58905e81cdd9 Mon Sep 17 00:00:00 2001 From: yijiagu Date: Mon, 7 Nov 2022 09:47:53 -0800 Subject: [PATCH 451/516] [mlir] Lower async.func with async.coro and async.runtime operations Lower async.func with async.coro and async.runtime operations - This patch modifies AsyncToAsyncRuntime pass to add lowering async.func ops with coroutine cfg. Example: ``` async.func @foo() -> !async.value { %cst = arith.constant 42.0 : f32 return %cst: f32 } ``` After lowering: ``` func.func @foo() -> !async.value attributes {passthrough = ["presplitcoroutine"]} { %0 = async.runtime.create : !async.value %1 = async.coro.id %2 = async.coro.begin %1 cf.br ^bb1 ^bb1: // pred: ^bb0 %cst = arith.constant 4.200000e+01 : f32 async.runtime.store %cst, %0 : async.runtime.set_available %0 : !async.value cf.br ^bb2 ^bb2: // pred: ^bb1 async.coro.free %1, %2 cf.br ^bb3 ^bb3: // pred: ^bb2 async.coro.end %2 return %0 : !async.value } ``` Reviewed By: ezhulenev Differential Revision: https://reviews.llvm.org/D137462 --- .../Async/Transforms/AsyncToAsyncRuntime.cpp | 270 +++++++++++++----- .../Dialect/Async/async-to-async-runtime.mlir | 22 ++ mlir/test/mlir-cpu-runner/async-func.mlir | 149 ++++++++++ 3 files changed, 366 insertions(+), 75 deletions(-) create mode 100644 mlir/test/mlir-cpu-runner/async-func.mlir diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp index 38f3717c70f9b..66c5b731b6e76 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp @@ -51,10 +51,6 @@ class AsyncToAsyncRuntimePass } // namespace -//===----------------------------------------------------------------------===// -// async.execute op outlining to the coroutine functions. -//===----------------------------------------------------------------------===// - /// Function targeted for coroutine transformation has two additional blocks at /// the end: coroutine cleanup and coroutine suspension. /// @@ -64,6 +60,12 @@ namespace { struct CoroMachinery { func::FuncOp func; + // Async function returns an optional token, followed by some async values + // + // async.func @foo() -> !async.value { + // %cst = arith.constant 42.0 : T + // return %cst: T + // } // Async execute region returns a completion token, and an async value for // each yielded value. // @@ -71,12 +73,12 @@ struct CoroMachinery { // %0 = arith.constant ... : T // async.yield %0 : T // } - Value asyncToken; // token representing completion of the async region + Optional asyncToken; // returned completion token llvm::SmallVector returnValues; // returned async values Value coroHandle; // coroutine handle (!async.coro.getHandle value) Block *entry; // coroutine entry block - Block *setError; // switch completion token and all values to error state + Optional setError; // set returned values to error state Block *cleanup; // coroutine cleanup block Block *suspend; // coroutine suspension block }; @@ -87,13 +89,9 @@ struct CoroMachinery { /// `async.runtime.*` and `async.coro.*` operations. Adds a new entry block /// that branches into preexisting entry block. Also inserts trailing blocks. /// -/// The result types of the passed `func` must start with an `async.token` +/// The result types of the passed `func` start with an optional `async.token` /// and be continued with some number of `async.value`s. /// -/// The func given to this function needs to have been preprocessed to have -/// either branch or yield ops as terminators. Branches to the cleanup block are -/// inserted after each yield. -/// /// See LLVM coroutines documentation: https://llvm.org/docs/Coroutines.html /// /// - `entry` block sets up the coroutine. @@ -110,7 +108,7 @@ struct CoroMachinery { /// ^entry(): /// %token = : !async.token // create async runtime token /// %value = : !async.value // create async value -/// %id = async.coro.getId // create a coroutine id +/// %id = async.coro.getId // create a coroutine id /// %hdl = async.coro.begin %id // create a coroutine handle /// cf.br ^preexisting_entry_block /// @@ -142,11 +140,20 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // ------------------------------------------------------------------------ // // Allocate async token/values that we will return from a ramp function. // ------------------------------------------------------------------------ // - auto retToken = - builder.create(TokenType::get(ctx)).getResult(); + + // We treat TokenType as state update marker to represent side-effects of + // async computations + bool isStateful = func.getCallableResults().front().isa(); + + Optional retToken; + if (isStateful) + retToken.emplace(builder.create(TokenType::get(ctx))); llvm::SmallVector retValues; - for (auto resType : func.getCallableResults().drop_front()) + ArrayRef resValueTypes = isStateful + ? func.getCallableResults().drop_front() + : func.getCallableResults(); + for (auto resType : resValueTypes) retValues.emplace_back( builder.create(resType).getResult()); @@ -179,26 +186,17 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // Mark the end of a coroutine: async.coro.end builder.create(coroHdlOp.getHandle()); - // Return created `async.token` and `async.values` from the suspend block. - // This will be the return value of a coroutine ramp function. - SmallVector ret{retToken}; + // Return created optional `async.token` and `async.values` from the suspend + // block. This will be the return value of a coroutine ramp function. + SmallVector ret; + if (retToken) + ret.push_back(*retToken); ret.insert(ret.end(), retValues.begin(), retValues.end()); builder.create(ret); // `async.await` op lowering will create resume blocks for async // continuations, and will conditionally branch to cleanup or suspend blocks. - for (Block &block : func.getBody().getBlocks()) { - if (&block == entryBlock || &block == cleanupBlock || - &block == suspendBlock) - continue; - Operation *terminator = block.getTerminator(); - if (auto yield = dyn_cast(terminator)) { - builder.setInsertionPointToEnd(&block); - builder.create(cleanupBlock); - } - } - // The switch-resumed API based coroutine should be marked with // coroutine.presplit attribute to mark the function as a coroutine. func->setAttr("passthrough", builder.getArrayAttr( @@ -210,7 +208,7 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { machinery.returnValues = retValues; machinery.coroHandle = coroHdlOp.getHandle(); machinery.entry = entryBlock; - machinery.setError = nullptr; // created lazily only if needed + machinery.setError = None; // created lazily only if needed machinery.cleanup = cleanupBlock; machinery.suspend = suspendBlock; return machinery; @@ -220,25 +218,31 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // runtime operations (see for example lowering of assert operation). static Block *setupSetErrorBlock(CoroMachinery &coro) { if (coro.setError) - return coro.setError; + return *coro.setError; coro.setError = coro.func.addBlock(); - coro.setError->moveBefore(coro.cleanup); + (*coro.setError)->moveBefore(coro.cleanup); auto builder = - ImplicitLocOpBuilder::atBlockBegin(coro.func->getLoc(), coro.setError); + ImplicitLocOpBuilder::atBlockBegin(coro.func->getLoc(), *coro.setError); // Coroutine set_error block: set error on token and all returned values. - builder.create(coro.asyncToken); + if (coro.asyncToken) + builder.create(*coro.asyncToken); + for (Value retValue : coro.returnValues) builder.create(retValue); // Branch into the cleanup block. builder.create(coro.cleanup); - return coro.setError; + return *coro.setError; } +//===----------------------------------------------------------------------===// +// async.execute op outlining to the coroutine functions. +//===----------------------------------------------------------------------===// + /// Outline the body region attached to the `async.execute` op into a standalone /// function. /// @@ -382,6 +386,118 @@ class AddToGroupOpLowering : public OpConversionPattern { }; } // namespace +//===----------------------------------------------------------------------===// +// Convert async.func, async.return and async.call operations to non-blocking +// operations based on llvm coroutine +//===----------------------------------------------------------------------===// + +namespace { + +//===----------------------------------------------------------------------===// +// Convert async.func operation to func.func +//===----------------------------------------------------------------------===// + +class AsyncFuncOpLowering : public OpConversionPattern { +public: + AsyncFuncOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} + + LogicalResult + matchAndRewrite(async::FuncOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + + auto newFuncOp = + rewriter.create(loc, op.getName(), op.getFunctionType()); + + SymbolTable::setSymbolVisibility(newFuncOp, + SymbolTable::getSymbolVisibility(op)); + // Copy over all attributes other than the name. + for (const auto &namedAttr : op->getAttrs()) { + if (namedAttr.getName() != SymbolTable::getSymbolAttrName()) + newFuncOp->setAttr(namedAttr.getName(), namedAttr.getValue()); + } + + rewriter.inlineRegionBefore(op.getBody(), newFuncOp.getBody(), + newFuncOp.end()); + + CoroMachinery coro = setupCoroMachinery(newFuncOp); + coros[newFuncOp] = coro; + // no initial suspend, we should hot-start + + rewriter.eraseOp(op); + return success(); + } + +private: + llvm::DenseMap &coros; +}; + +//===----------------------------------------------------------------------===// +// Convert async.call operation to func.call +//===----------------------------------------------------------------------===// + +class AsyncCallOpLowering : public OpConversionPattern { +public: + AsyncCallOpLowering(MLIRContext *ctx) + : OpConversionPattern(ctx) {} + + LogicalResult + matchAndRewrite(async::CallOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp( + op, op.getCallee(), op.getResultTypes(), op.getOperands()); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// Convert async.return operation to async.runtime operations. +//===----------------------------------------------------------------------===// + +class AsyncReturnOpLowering : public OpConversionPattern { +public: + AsyncReturnOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} + + LogicalResult + matchAndRewrite(async::ReturnOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto func = op->template getParentOfType(); + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) + return rewriter.notifyMatchFailure( + op, "operation is not inside the async coroutine function"); + + Location loc = op->getLoc(); + const CoroMachinery &coro = funcCoro->getSecond(); + rewriter.setInsertionPointAfter(op); + + // Store return values into the async values storage and switch async + // values state to available. + for (auto tuple : llvm::zip(adaptor.getOperands(), coro.returnValues)) { + Value returnValue = std::get<0>(tuple); + Value asyncValue = std::get<1>(tuple); + rewriter.create(loc, returnValue, asyncValue); + rewriter.create(loc, asyncValue); + } + + if (coro.asyncToken) + // Switch the coroutine completion token to available state. + rewriter.create(loc, *coro.asyncToken); + + rewriter.eraseOp(op); + rewriter.create(loc, coro.cleanup); + return success(); + } + +private: + llvm::DenseMap &coros; +}; +} // namespace + //===----------------------------------------------------------------------===// // Convert async.await and async.await_all operations to the async.runtime.await // or async.runtime.await_and_resume operations. @@ -393,11 +509,9 @@ class AwaitOpLoweringBase : public OpConversionPattern { using AwaitAdaptor = typename AwaitType::Adaptor; public: - AwaitOpLoweringBase( - MLIRContext *ctx, - llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + AwaitOpLoweringBase(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(AwaitType op, typename AwaitType::Adaptor adaptor, @@ -409,8 +523,8 @@ class AwaitOpLoweringBase : public OpConversionPattern { // Check if await operation is inside the outlined coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - const bool isInCoroutine = outlined != outlinedFunctions.end(); + auto funcCoro = coros.find(func); + const bool isInCoroutine = funcCoro != coros.end(); Location loc = op->getLoc(); Value operand = adaptor.getOperand(); @@ -436,7 +550,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { // Inside the coroutine we convert await operation into coroutine suspension // point, and resume execution asynchronously. if (isInCoroutine) { - CoroMachinery &coro = outlined->getSecond(); + CoroMachinery &coro = funcCoro->getSecond(); Block *suspended = op->getBlock(); ImplicitLocOpBuilder builder(loc, op, rewriter.getListener()); @@ -488,7 +602,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { } private: - llvm::DenseMap &outlinedFunctions; + llvm::DenseMap &coros; }; /// Lowering for `async.await` with a token operand. @@ -531,24 +645,22 @@ class AwaitAllOpLowering : public AwaitOpLoweringBase { class YieldOpLowering : public OpConversionPattern { public: - YieldOpLowering( - MLIRContext *ctx, - const llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + YieldOpLowering(MLIRContext *ctx, + const llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(async::YieldOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Check if yield operation is inside the async coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - if (outlined == outlinedFunctions.end()) + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) return rewriter.notifyMatchFailure( op, "operation is not inside the async coroutine function"); Location loc = op->getLoc(); - const CoroMachinery &coro = outlined->getSecond(); + const CoroMachinery &coro = funcCoro->getSecond(); // Store yielded values into the async values storage and switch async // values state to available. @@ -559,14 +671,18 @@ class YieldOpLowering : public OpConversionPattern { rewriter.create(loc, asyncValue); } - // Switch the coroutine completion token to available state. - rewriter.replaceOpWithNewOp(op, coro.asyncToken); + if (coro.asyncToken) + // Switch the coroutine completion token to available state. + rewriter.create(loc, *coro.asyncToken); + + rewriter.eraseOp(op); + rewriter.create(loc, coro.cleanup); return success(); } private: - const llvm::DenseMap &outlinedFunctions; + const llvm::DenseMap &coros; }; //===----------------------------------------------------------------------===// @@ -575,24 +691,22 @@ class YieldOpLowering : public OpConversionPattern { class AssertOpLowering : public OpConversionPattern { public: - AssertOpLowering( - MLIRContext *ctx, - llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + AssertOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(cf::AssertOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Check if assert operation is inside the async coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - if (outlined == outlinedFunctions.end()) + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) return rewriter.notifyMatchFailure( op, "operation is not inside the async coroutine function"); Location loc = op->getLoc(); - CoroMachinery &coro = outlined->getSecond(); + CoroMachinery &coro = funcCoro->getSecond(); Block *cont = rewriter.splitBlock(op->getBlock(), Block::iterator(op)); rewriter.setInsertionPointToEnd(cont->getPrevNode()); @@ -607,7 +721,7 @@ class AssertOpLowering : public OpConversionPattern { } private: - llvm::DenseMap &outlinedFunctions; + llvm::DenseMap &coros; }; //===----------------------------------------------------------------------===// @@ -615,22 +729,23 @@ void AsyncToAsyncRuntimePass::runOnOperation() { ModuleOp module = getOperation(); SymbolTable symbolTable(module); - // Outline all `async.execute` body regions into async functions (coroutines). - llvm::DenseMap outlinedFunctions; + // Functions with coroutine CFG setups, which are results of outlining + // `async.execute` body regions and converting async.func. + llvm::DenseMap coros; module.walk([&](ExecuteOp execute) { - outlinedFunctions.insert(outlineExecuteOp(symbolTable, execute)); + coros.insert(outlineExecuteOp(symbolTable, execute)); }); LLVM_DEBUG({ - llvm::dbgs() << "Outlined " << outlinedFunctions.size() + llvm::dbgs() << "Outlined " << coros.size() << " functions built from async.execute operations\n"; }); // Returns true if operation is inside the coroutine. auto isInCoroutine = [&](Operation *op) -> bool { auto parentFunc = op->getParentOfType(); - return outlinedFunctions.find(parentFunc) != outlinedFunctions.end(); + return coros.find(parentFunc) != coros.end(); }; // Lower async operations to async.runtime operations. @@ -646,18 +761,23 @@ void AsyncToAsyncRuntimePass::runOnOperation() { // Async lowering does not use type converter because it must preserve all // types for async.runtime operations. asyncPatterns.add(ctx); + + // Lower async.func to func.func with coroutine cfg. + asyncPatterns.add(ctx); + asyncPatterns.add(ctx, coros); + asyncPatterns.add(ctx, - outlinedFunctions); + AwaitAllOpLowering, YieldOpLowering>(ctx, coros); // Lower assertions to conditional branches into error blocks. - asyncPatterns.add(ctx, outlinedFunctions); + asyncPatterns.add(ctx, coros); // All high level async operations must be lowered to the runtime operations. ConversionTarget runtimeTarget(*ctx); - runtimeTarget.addLegalDialect(); + runtimeTarget.addLegalDialect(); runtimeTarget.addIllegalOp(); - runtimeTarget.addIllegalOp(); + runtimeTarget.addIllegalOp(); // Decide if structured control flow has to be lowered to branch-based CFG. runtimeTarget.addDynamicallyLegalDialect([&](Operation *op) { @@ -675,7 +795,7 @@ void AsyncToAsyncRuntimePass::runOnOperation() { runtimeTarget.addDynamicallyLegalOp( [&](cf::AssertOp op) -> bool { auto func = op->getParentOfType(); - return outlinedFunctions.find(func) == outlinedFunctions.end(); + return coros.find(func) == coros.end(); }); if (failed(applyPartialConversion(module, runtimeTarget, diff --git a/mlir/test/Dialect/Async/async-to-async-runtime.mlir b/mlir/test/Dialect/Async/async-to-async-runtime.mlir index d7ebfb9e77926..1551e55c90c08 100644 --- a/mlir/test/Dialect/Async/async-to-async-runtime.mlir +++ b/mlir/test/Dialect/Async/async-to-async-runtime.mlir @@ -433,3 +433,25 @@ func.func @clone_constants(%arg0: f32, %arg1: memref<1xf32>) { // CHECK-SAME: ) -> !async.token // CHECK: %[[CST:.*]] = arith.constant 0 : index // CHECK: memref.store %[[VALUE]], %[[MEMREF]][%[[CST]]] + +// ----- +// Async Functions should be none blocking + +// CHECK-LABEL: @async_func_await +async.func @async_func_await(%arg0: f32, %arg1: !async.value) + -> !async.token { + %0 = async.await %arg1 : !async.value + return +} +// Create token for return op, and mark a function as a coroutine. +// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token +// CHECK: %[[ID:.*]] = async.coro.id +// CHECK: %[[HDL:.*]] = async.coro.begin +// CHECK: cf.br ^[[ORIGIN_ENTRY:.*]] + +// CHECK: ^[[ORIGIN_ENTRY]]: +// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] +// CHECK: async.runtime.await_and_resume %[[arg1:.*]], %[[HDL]] : +// CHECK-SAME: !async.value +// CHECK: async.coro.suspend %[[SAVED]] +// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] diff --git a/mlir/test/mlir-cpu-runner/async-func.mlir b/mlir/test/mlir-cpu-runner/async-func.mlir new file mode 100644 index 0000000000000..8b3d728d4667f --- /dev/null +++ b/mlir/test/mlir-cpu-runner/async-func.mlir @@ -0,0 +1,149 @@ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ +// RUN: | mlir-cpu-runner \ +// RUN: -e main -entry-point-result=void -O0 \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_async_runtime%shlibext \ +// RUN: | FileCheck %s --dump-input=always + +// FIXME: https://github.com/llvm/llvm-project/issues/57231 +// UNSUPPORTED: hwasan + +async.func @async_func_empty() -> !async.token { + return +} + +async.func @async_func_assert() -> !async.token { + %false = arith.constant 0 : i1 + cf.assert %false, "error" + return +} + +async.func @async_func_nested_assert() -> !async.token { + %token0 = async.call @async_func_assert() : () -> !async.token + async.await %token0 : !async.token + return +} + +async.func @async_func_value_assert() -> !async.value { + %false = arith.constant 0 : i1 + cf.assert %false, "error" + %0 = arith.constant 123.45 : f32 + return %0 : f32 +} + +async.func @async_func_value_nested_assert() -> !async.value { + %value0 = async.call @async_func_value_assert() : () -> !async.value + %ret = async.await %value0 : !async.value + return %ret : f32 +} + +async.func @async_func_return_value() -> !async.value { + %0 = arith.constant 456.789 : f32 + return %0 : f32 +} + +async.func @async_func_non_blocking_await() -> !async.value { + %value0 = async.call @async_func_return_value() : () -> !async.value + %1 = async.await %value0 : !async.value + return %1 : f32 +} + +async.func @async_func_inside_memref() -> !async.value> { + %0 = memref.alloc() : memref + %c0 = arith.constant 0.25 : f32 + memref.store %c0, %0[] : memref + return %0 : memref +} + +async.func @async_func_passed_memref(%arg0 : !async.value>) -> !async.token { + %unwrapped = async.await %arg0 : !async.value> + %0 = memref.load %unwrapped[] : memref + %1 = arith.addf %0, %0 : f32 + memref.store %1, %unwrapped[] : memref + return +} + + +func.func @main() { + %false = arith.constant 0 : i1 + + // ------------------------------------------------------------------------ // + // Check that simple async.func completes without errors. + // ------------------------------------------------------------------------ // + %token0 = async.call @async_func_empty() : () -> !async.token + async.runtime.await %token0 : !async.token + + // CHECK: 0 + %err0 = async.runtime.is_error %token0 : !async.token + vector.print %err0 : i1 + + // ------------------------------------------------------------------------ // + // Check that assertion in the async.func converted to async error. + // ------------------------------------------------------------------------ // + %token1 = async.call @async_func_assert() : () -> !async.token + async.runtime.await %token1 : !async.token + + // CHECK: 1 + %err1 = async.runtime.is_error %token1 : !async.token + vector.print %err1 : i1 + + // ------------------------------------------------------------------------ // + // Check error propagation from the nested async.func. + // ------------------------------------------------------------------------ // + %token2 = async.call @async_func_nested_assert() : () -> !async.token + async.runtime.await %token2 : !async.token + + // CHECK: 1 + %err2 = async.runtime.is_error %token2 : !async.token + vector.print %err2 : i1 + + // ------------------------------------------------------------------------ // + // Check error propagation from the nested async.func with async values. + // ------------------------------------------------------------------------ // + %value3 = async.call @async_func_value_nested_assert() : () -> !async.value + async.runtime.await %value3 : !async.value + + // CHECK: 1 + %err3_0 = async.runtime.is_error %value3 : !async.value + vector.print %err3_0 : i1 + + // ------------------------------------------------------------------------ // + // Non-blocking async.await inside the async.func + // ------------------------------------------------------------------------ // + %result0 = async.call @async_func_non_blocking_await() : () -> !async.value + %4 = async.await %result0 : !async.value + + // CHECK: 456.789 + vector.print %4 : f32 + + // ------------------------------------------------------------------------ // + // Memref allocated inside async.func. + // ------------------------------------------------------------------------ // + %result1 = async.call @async_func_inside_memref() : () -> !async.value> + %5 = async.await %result1 : !async.value> + %6 = memref.cast %5 : memref to memref<*xf32> + + // CHECK: Unranked Memref + // CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = [] + // CHECK-NEXT: [0.25] + call @printMemrefF32(%6) : (memref<*xf32>) -> () + + // ------------------------------------------------------------------------ // + // Memref passed as async.func parameter + // ------------------------------------------------------------------------ // + %token3 = async.call @async_func_passed_memref(%result1) : (!async.value>) -> !async.token + async.await %token3 : !async.token + + // CHECK: Unranked Memref + // CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = [] + // CHECK-NEXT: [0.5] + call @printMemrefF32(%6) : (memref<*xf32>) -> () + + memref.dealloc %5 : memref + + return +} + +func.func private @printMemrefF32(memref<*xf32>) + attributes { llvm.emit_c_interface } From c4b74658c7875cf2ac322956c2116c5fbea56158 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 7 Nov 2022 09:54:10 -0800 Subject: [PATCH 452/516] [mlir] Fix a warning (NFC) This patch fixes: mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp:717:48: error: comparison of integers of different signs: 'int64_t' (aka 'long') and 'uint64_t' (aka 'unsigned long') [-Werror,-Wsign-compare] --- mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index b0c88e161a50d..6d6bd26251953 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -714,7 +714,7 @@ LogicalResult SortCooOp::verify() { auto checkDim = [&](Value v, uint64_t min, const char *message) { MemRefType tp = v.getType().cast(); int64_t dim = tp.getShape()[0]; - if (dim != ShapedType::kDynamicSize && dim < min) { + if (dim != ShapedType::kDynamicSize && dim < (int64_t)min) { emitError(llvm::formatv("{0} got {1} < {2}", message, dim, min)); } }; From 75ac294b35edd0efeb2f69005e4ccdff95604fdf Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Mon, 7 Nov 2022 17:10:01 +0000 Subject: [PATCH 453/516] [mlir][sparse] support parallel for/reduction in sparsification. This patch fix the re-revert D135927 (which caused a windows build failure) to re-enable parallel for/reduction. It also fix a warning caused by D137442. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137565 --- .../SparseTensor/Transforms/CodegenUtils.cpp | 146 +++++++++++++----- .../SparseTensor/Transforms/CodegenUtils.h | 32 +++- .../Transforms/Sparsification.cpp | 133 +++++++--------- .../Dialect/SparseTensor/sparse_parallel.mlir | 20 +-- .../SparseTensor/sparse_parallel_reduce.mlir | 63 ++++++++ .../SparseTensor/CPU/sparse_matmul.mlir | 8 + .../SparseTensor/CPU/sparse_matvec.mlir | 10 ++ 7 files changed, 285 insertions(+), 127 deletions(-) create mode 100644 mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 1e9cadd13e156..fc240b0b10c08 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -219,9 +219,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { + assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); + // TODO: support multiple return on parallel for? + assert(!isParallel || reduc.size() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; @@ -232,11 +235,38 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; + Operation *loop = nullptr; + Value iv; + if (isParallel) { + scf::ParallelOp parOp = + builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(parOp.getBody()); + assert(parOp.getNumReductions() == reduc.size()); + iv = parOp.getInductionVars()[0]; + + // In-place update on the reduction variable vector. + // Note that the init vals is not the actual reduction variables but instead + // used as a `special handle` to (temporarily) represent them. The + // expression on init vals will be moved into scf.reduce and replaced with + // the block arguments when exiting the loop (see exitForLoop). This is + // needed as we can not build the actual reduction block and get the actual + // reduction varaible before users fill parallel loop body. + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = parOp.getInitVals()[i]; + loop = parOp; + } else { + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(forOp.getBody()); + iv = forOp.getInductionVar(); + + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + loop = forOp; + } + assert(loop && iv); - scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(forOp.getBody()); - Value iv = forOp.getInductionVar(); - assert(iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. @@ -253,16 +283,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( // NOTE: we can also prepares for next dim here in advance // Push the loop into stack - loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, coord[tid][dim]); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); - // In-place update on the reduction variable vector. - assert(forOp.getNumRegionIterArgs() == reduc.size()); - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = forOp.getRegionIterArg(i); - return forOp; + return loop; } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( @@ -434,17 +460,73 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( } } -SmallVector -SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; - auto forOp = llvm::cast(loopInfo.loop); - if (!reduc.empty()) { - assert(reduc.size() == forOp.getNumResults()); - builder.setInsertionPointToEnd(forOp.getBody()); - builder.create(loc, reduc); + auto forOp = llvm::dyn_cast(loopInfo.loop); + if (forOp) { + if (!reduc.empty()) { + assert(reduc.size() == forOp.getNumResults()); + rewriter.setInsertionPointToEnd(forOp.getBody()); + rewriter.create(loc, reduc); + } + // Exit the loop. + rewriter.setInsertionPointAfter(forOp); + // In-place update reduction variables. + for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) + reduc[i] = forOp.getResult(i); + } else { + auto parOp = llvm::cast(loopInfo.loop); + if (!reduc.empty()) { + assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); + Operation *redExp = reduc.front().getDefiningOp(); + // Reduction expression should have no use. + assert(redExp->getUses().empty()); + // This must be a binary operation. + // NOTE: This is users' responsibilty to ensure the operation are + // commutative. + assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); + + Value redVal = parOp.getInitVals().front(); + Value curVal; + if (redExp->getOperand(0) == redVal) + curVal = redExp->getOperand(1); + else if (redExp->getOperand(1) == redVal) + curVal = redExp->getOperand(0); + // One of the operands must be the init value (which is also the + // previous reduction value). + assert(curVal); + // The reduction expression should be the only user of the reduction val + // inside the parallel for. + unsigned numUsers = 0; + for (Operation *op : redVal.getUsers()) { + if (op->getParentOp() == parOp) + numUsers++; + } + assert(numUsers == 1); + (void)numUsers; // to silence unused variable warning in release build + + rewriter.setInsertionPointAfter(redExp); + auto redOp = rewriter.create(loc, curVal); + // Attach to the reduction op. + Block *redBlock = &redOp.getRegion().getBlocks().front(); + rewriter.setInsertionPointToEnd(redBlock); + Operation *newRed = rewriter.clone(*redExp); + // Replaces arguments of the reduction expression by using the block + // arguments from scf.reduce. + rewriter.updateRootInPlace( + newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); + // Erases the out-dated reduction expression. + rewriter.eraseOp(redExp); + rewriter.setInsertionPointToEnd(redBlock); + rewriter.create(loc, newRed->getResult(0)); + } + rewriter.setInsertionPointAfter(parOp); + // In-place update reduction variables. + for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) + reduc[i] = parOp.getResult(i); } // Finished iterating a tensor, clean up @@ -458,14 +540,10 @@ SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } - // exit the loop - builder.setInsertionPointAfter(forOp); - return forOp.getResults(); } -SmallVector -SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCoIterationLoop( + OpBuilder &builder, Location loc, MutableArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; @@ -499,10 +577,10 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, } // Reduction value from users. - SmallVector ret; - for (auto red : reduc) { - operands.push_back(red); - ret.push_back(whileOp->getResult(o++)); + for (unsigned i = 0, e = reduc.size(); i < e; i++) { + operands.push_back(reduc[i]); + // In place update reduction variable. + reduc[i] = whileOp->getResult(o++); } // An (optional) universal index. @@ -517,26 +595,24 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); - return ret; } -SmallVector -SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, + Location loc, + MutableArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - red = exitCoiterationLoop(builder, loc, reduc); + exitCoIterationLoop(rewriter, loc, reduc); } else { - red = exitForLoop(builder, loc, reduc); + exitForLoop(rewriter, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); - return red; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 3228eb4c79cb2..a75d3920a4d55 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -380,8 +380,8 @@ class SparseTensorLoopEmitter { ArrayRef dims, bool needsUniv, MutableArrayRef reduc = {}, ArrayRef extraTids = {}, ArrayRef extraDims = {}); - SmallVector exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc = {}); + void exitCurrentLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc = {}); /// Returns the array of coordinate for all the loop generated till now. void getCoordinateArray(SmallVectorImpl &coords) const { @@ -452,17 +452,35 @@ class SparseTensorLoopEmitter { ArrayRef dims); /// Exits a for loop, returns the reduction results, e.g., + /// For sequential for loops: /// %ret = for () { /// ... + /// %val = addi %args, %c /// yield %val /// } - /// Return %ret to user, while %val is provided by users (`reduc`) - SmallVector exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + /// For parallel loops, the following generated code by users: + /// %ret = parallel () init(%args) { + /// ... + /// %val = op %args, %c + /// } + /// will be transformed into + /// %ret = parallel () init(%args) { + /// ... + /// scf.reduce(%c) bb0(%0, %1){ + /// %val = op %0, %1 + /// scf.reduce.return %val + /// } + /// } + /// NOTE: only one instruction will be moved into reduce block, transformation + /// will fail if multiple instructions are used to compute the reduction + /// value. + /// Return %ret to user, while %val is provided by users (`reduc`). + void exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc); /// Exits a while loop, returns the reduction results. - SmallVector exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + void exitCoIterationLoop(OpBuilder &builder, Location loc, + MutableArrayRef reduc); // Whether the loop emitter needs to treat the last tensor as the output // tensor. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 9f01731a34d4c..533d31fdb5536 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -410,6 +410,34 @@ static Value getCustomRedId(Operation *op) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// +/// Generates loop boundary statements (entering/exiting loops). The function +/// passes and updates the reduction value. +static Optional genLoopBoundary( + CodeGen &codegen, Merger &merger, + function_ref(MutableArrayRef reduc)> + callback) { + SmallVector reduc; + if (codegen.redVal) + reduc.push_back(codegen.redVal); + if (codegen.expValues) + reduc.push_back(codegen.expCount); + if (codegen.insChain) + reduc.push_back(codegen.insChain); + + auto r = callback(reduc); + + // Callback should do in-place update on reduction value vector. + unsigned i = 0; + if (codegen.redVal) + updateReduc(merger, codegen, reduc[i++]); + if (codegen.expValues) + codegen.expCount = reduc[i++]; + if (codegen.insChain) + codegen.insChain = reduc[i]; + + return r; +} + /// Local bufferization of all dense and sparse data structures. static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { @@ -869,23 +897,25 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder, /// Returns parallelization strategy. Any implicit loop in the Linalg /// operation that is marked "parallel" is a candidate. Whether it is actually /// converted to a parallel operation depends on the requested strategy. -static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction, - bool isSparse) { +static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) { // Reject parallelization of sparse output. if (codegen.sparseOut) return false; + // Parallel loops on tensor expansion can cause data races. + if (codegen.expCount) + return false; // Inspect strategy. switch (codegen.options.parallelizationStrategy) { case SparseParallelizationStrategy::kNone: return false; case SparseParallelizationStrategy::kDenseOuterLoop: - return isOuter && !isSparse && !isReduction; + return isOuter && !isSparse; case SparseParallelizationStrategy::kAnyStorageOuterLoop: - return isOuter && !isReduction; + return isOuter; case SparseParallelizationStrategy::kDenseAnyLoop: - return !isSparse && !isReduction; + return !isSparse; case SparseParallelizationStrategy::kAnyStorageAnyLoop: - return !isReduction; + return true; } llvm_unreachable("unexpected parallelization strategy"); } @@ -898,33 +928,16 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef extraDims) { Location loc = op.getLoc(); auto iteratorTypes = op.getIteratorTypesArray(); - bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) || isSingletonDLT(merger.getDimLevelType(tid, idx)); - bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse); - assert(!isParallel); - - // Emit a sequential for loop. - SmallVector operands; - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim( - builder, loc, tid, dim, operands, isParallel, extraTids, extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); - + bool isParallel = isParallelFor(codegen, isOuter, isSparse); + + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + return codegen.loopEmitter.enterLoopOverTensorAtDim( + builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -934,29 +947,15 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef condTids, ArrayRef condDims, ArrayRef extraTids, ArrayRef extraDims) { - SmallVector operands; - - // Construct the while-loop with a parameter for each index. - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims( - builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids, - extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + // Construct the while-loop with a parameter for each index. + return codegen.loopEmitter.enterCoIterationOverTensorsAtDims( + builder, op.getLoc(), condTids, condDims, needsUniv, reduc, + extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -1186,37 +1185,21 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen, } /// Ends a single loop in current sequence. Returns new values for needsUniv. -static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder, +static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter, linalg::GenericOp op, Operation *loop, unsigned idx, unsigned li, bool needsUniv) { // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv, + finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv, merger.lat(li).bits, whileOp); } else { needsUniv = false; } - SmallVector reduc; - if (codegen.redVal) - reduc.push_back(codegen.redVal); - if (codegen.expValues) - reduc.push_back(codegen.expCount); - if (codegen.insChain) - reduc.push_back(codegen.insChain); - - auto loopRet = - codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc); - assert(reduc.size() == loopRet.size()); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, loopRet[o++]); - if (codegen.expValues) - codegen.expCount = loopRet[o++]; - if (codegen.insChain) - codegen.insChain = loopRet[o++]; - assert(o == loopRet.size()); + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc); + return llvm::None; + }); return needsUniv; } diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir index 38766b08ccab8..f38865c5e2a4f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -1,14 +1,13 @@ // RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \ // RUN: FileCheck %s --check-prefix=CHECK-PAR0 -// FIXME: we do not support vectorization/parallel loops in loop emitter right now -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR1 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR2 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR3 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR4 #DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] @@ -151,7 +150,8 @@ func.func @scale_ss(%scale: f32, // // CHECK-PAR4-LABEL: func @matvec // CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.for +// CHECK-PAR4: scf.parallel +// CHECK-PAR4: scf.reduce // CHECK-PAR4: return // func.func @matvec(%arga: tensor<16x32xf32, #CSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir new file mode 100644 index 0000000000000..8ba66d2c92ae1 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s + +#CSR = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ] +}> + +#trait_matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel", "reduction"], + doc = "x(i) += A(i,j) * b(j)" +} +// CHECK-LABEL: func.func @matvec( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>, +// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, +// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { +// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] +// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> +// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> +// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { +// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref +// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { +// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> +// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 +// CHECK: scf.reduce(%[[TMP_14]]) : f32 { +// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): +// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 +// CHECK: scf.reduce.return %[[TMP_15]] : f32 +// CHECK: } +// CHECK: scf.yield +// CHECK: } +// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: scf.yield +// CHECK: } +// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> +// CHECK: return %[[TMP_5]] : tensor<16xf32> +func.func @matvec(%arga: tensor<16x32xf32, #CSR>, + %argb: tensor<32xf32>, + %argx: tensor<16xf32>) -> tensor<16xf32> { + %0 = linalg.generic #trait_matvec + ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) + outs(%argx: tensor<16xf32>) { + ^bb(%A: f32, %b: f32, %x: f32): + %0 = arith.mulf %A, %b : f32 + %1 = arith.addf %0, %x : f32 + linalg.yield %1 : f32 + } -> tensor<16xf32> + return %0 : tensor<16xf32> +} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index c12d2b9b913e4..459b0e13667f6 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -2,6 +2,14 @@ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index 59e7f33c22c88..adc0b261f04d3 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -4,6 +4,16 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s \ +// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s !Filename = !llvm.ptr From 89ddcff1d2d6e9f4de78f3a563a8b1987bf7ea8f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Nov 2022 10:07:10 -0800 Subject: [PATCH 454/516] [LTO] Make local linkage GlobalValue in non-prevailing COMDAT available_externally For a local linkage GlobalObject in a non-prevailing COMDAT, it remains defined while its leader has been made available_externally. This violates the COMDAT rule that its members must be retained or discarded as a unit. To fix this, update the regular LTO change D34803 to track local linkage GlobalValues, and port the code to ThinLTO (GlobalAliases are not handled.) This fixes two problems. (a) `__cxx_global_var_init` in a non-prevailing COMDAT group used to linger around (unreferenced, hence benign), and is now correctly discarded. ``` int foo(); inline int v = foo(); ``` (b) Fix https://github.com/llvm/llvm-project/issues/58215: as a size optimization, we place private `__profd_` in a COMDAT with a `__profc_` key. When FuncImport.cpp makes `__profc_` available_externally due to a non-prevailing COMDAT, `__profd_` incorrectly remains private. This change makes the `__profd_` available_externally. ``` cat > c.h <<'eof' extern void bar(); inline __attribute__((noinline)) void foo() {} eof cat > m1.cc <<'eof' #include "c.h" int main() { bar(); foo(); } eof cat > m2.cc <<'eof' #include "c.h" __attribute__((noinline)) void bar() { foo(); } eof clang -O2 -fprofile-generate=./t m1.cc m2.cc -flto -fuse-ld=lld -o t_gen rm -fr t && ./t_gen && llvm-profdata show -function=foo t/default_*.profraw clang -O2 -fprofile-generate=./t m1.cc m2.cc -flto=thin -fuse-ld=lld -o t_gen rm -fr t && ./t_gen && llvm-profdata show -function=foo t/default_*.profraw ``` If a GlobalAlias references a GlobalValue which is just changed to available_externally, change the GlobalAlias as well (e.g. C5/D5 comdats due to cc1 -mconstructor-aliases). The GlobalAlias may be referenced by other available_externally functions, so it cannot easily be removed. Depends on D137441: we use available_externally to mark a GlobalAlias in a non-prevailing COMDAT, similar to how we handle GlobalVariable/Function. GlobalAlias may refer to a ConstantExpr, not changing GlobalAlias to GlobalVariable gives flexibility for future extensions (the use case is niche. For simplicity we don't handle it yet). In addition, available_externally GlobalAlias is the most straightforward implementation and retains the aliasee information to help optimizers. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D135427 --- llvm/lib/LTO/LTO.cpp | 10 ++--- llvm/lib/Transforms/IPO/FunctionImport.cpp | 35 ++++++++++++++- .../LTO/Resolution/X86/comdat-mixed-lto.ll | 19 ++++++-- .../X86/Inputs/linkonce_resolution_comdat.ll | 17 +++++-- llvm/test/ThinLTO/X86/constructor-alias.ll | 44 +++++++++++++++++++ .../ThinLTO/X86/linkonce_resolution_comdat.ll | 41 ++++++++++++----- 6 files changed, 144 insertions(+), 22 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/constructor-alias.ll diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 286d3ca3e2cc0..dc28b681a1515 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -712,11 +712,11 @@ handleNonPrevailingComdat(GlobalValue &GV, if (!NonPrevailingComdats.count(C)) return; - // Additionally need to drop externally visible global values from the comdat - // to available_externally, so that there aren't multiply defined linker - // errors. - if (!GV.hasLocalLinkage()) - GV.setLinkage(GlobalValue::AvailableExternallyLinkage); + // Additionally need to drop all global values from the comdat to + // available_externally, to satisfy the COMDAT requirement that all members + // are discarded as a unit. The non-local linkage global values avoid + // duplicate definition linker errors. + GV.setLinkage(GlobalValue::AvailableExternallyLinkage); if (auto GO = dyn_cast(&GV)) GO->setComdat(nullptr); diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index b589ec798caa1..844ee19e1e6a5 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1051,6 +1051,7 @@ bool llvm::convertToDeclaration(GlobalValue &GV) { void llvm::thinLTOFinalizeInModule(Module &TheModule, const GVSummaryMapTy &DefinedGlobals, bool PropagateAttrs) { + DenseSet NonPrevailingComdats; auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) { // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); @@ -1128,8 +1129,10 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. auto *GO = dyn_cast_or_null(&GV); - if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + NonPrevailingComdats.insert(GO->getComdat()); GO->setComdat(nullptr); + } }; // Process functions and global now @@ -1139,6 +1142,36 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, FinalizeInModule(GV); for (auto &GV : TheModule.aliases()) FinalizeInModule(GV); + + // For a non-prevailing comdat, all its members must be available_externally. + // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle + // local linkage GlobalValues. + if (NonPrevailingComdats.empty()) + return; + for (auto &GO : TheModule.global_objects()) { + if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) { + GO.setComdat(nullptr); + GO.setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } + bool Changed; + do { + Changed = false; + // If an alias references a GlobalValue in a non-prevailing comdat, change + // it to available_externally. For simplicity we don't handle ConstantExpr + // aliasee, which is unlikely used in a COMDAT. + for (auto &GA : TheModule.aliases()) { + if (GA.hasAvailableExternallyLinkage()) + continue; + assert(isa(GA.getAliasee()) && + "non-GlobalValue aliasee is unimplemented"); + if (const auto *GV = dyn_cast(GA.getAliasee())) + if (GV->hasAvailableExternallyLinkage()) { + GA.setLinkage(GlobalValue::AvailableExternallyLinkage); + Changed = true; + } + } + } while (Changed); } /// Run internalization on \p TheModule based on symmary analysis. diff --git a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll index d3730f4e9bcda..96d8f3157b996 100644 --- a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll +++ b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll @@ -8,7 +8,7 @@ ; The copy of C from this module is prevailing. The copy of C from the ; regular LTO module is not prevailing, and will be dropped to ; available_externally. -; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t2.o,testglobfunc,lxp -r=%t1.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps +; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lxp -r=%t2.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps ; The Input module (regular LTO) is %t3.0. Check to make sure that we removed ; __cxx_global_var_init and testglobfunc from comdat. Also check to ensure @@ -16,8 +16,21 @@ ; have linker multiply defined errors as it is no longer in a comdat and ; would clash with the copy from this module. ; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s -; CHECK: define internal void @__cxx_global_var_init() section ".text.startup" { -; CHECK: define available_externally dso_local void @testglobfunc() section ".text.startup" { + +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] +; CHECK: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 +; CHECK-NOT: declare +; CHECK: declare dso_local void @__cxx_global_var_init() section ".text.startup" +; CHECK-NOT: declare + +; Check the behavior with the prevailing testglobfunc in %t2.o. +; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lx -r=%t2.o,testglobfunc,plx -o %t4 %t1.o %t2.o -save-temps +; RUN: llvm-dis %t4.0.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK2 + +; CHECK2: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] +; CHECK2: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 +; CHECK2: declare dso_local void @__cxx_global_var_init() section ".text.startup" +; CHECK2: define available_externally dso_local void @testglobfunc() section ".text.startup" { ; ModuleID = 'comdat-mixed-lto.o' source_filename = "comdat-mixed-lto.cpp" diff --git a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll index 92b5182315943..f5b3130fd1520 100644 --- a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll @@ -1,13 +1,24 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c2 = comdat any +$f = comdat any +$g = comdat any -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c2) { +@g_private = private global i32 41, comdat($g) + +define linkonce_odr i32 @f(i8*) unnamed_addr comdat($f) { + ret i32 41 +} + +define linkonce_odr i32 @g() unnamed_addr comdat($g) { ret i32 41 } -define i32 @g() { +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +} + +define i32 @h() { %i = call i32 @f(i8* null) ret i32 %i } diff --git a/llvm/test/ThinLTO/X86/constructor-alias.ll b/llvm/test/ThinLTO/X86/constructor-alias.ll new file mode 100644 index 0000000000000..212ff7c425725 --- /dev/null +++ b/llvm/test/ThinLTO/X86/constructor-alias.ll @@ -0,0 +1,44 @@ +;; The constructor alias example is reduced from +;; +;; template +;; struct A { A() {} virtual ~A() {} }; +;; template struct A; +;; void *foo() { return new A; } +;; +;; clang -c -fpic -O1 -flto=thin a.cc && cp a.o b.o && ld.lld -shared a.o b.so + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: cp %t1.bc %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -r=%t1.bc,_ZTV1A,pl -r=%t1.bc,_ZN1AD0Ev,pl -r=%t1.bc,_ZN1AD1Ev,pl -r=%t1.bc,_ZN1AD2Ev,pl -r=%t1.bc,D1_a,pl -r=%t1.bc,D1_a_a,pl \ +; RUN: -r=%t2.bc,_ZTV1A,l -r=%t2.bc,_ZN1AD0Ev,l -r=%t2.bc,_ZN1AD1Ev,l -r=%t2.bc,_ZN1AD2Ev,l -r=%t2.bc,D1_a,l -r=%t2.bc,D1_a_a,l -o %t3 --save-temps +; RUN: llvm-dis < %t3.2.1.promote.bc | FileCheck %s + +; CHECK: @_ZTV1A = available_externally dso_local unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN1AD1Ev, ptr @_ZN1AD0Ev] } +; CHECK: @D1_a = available_externally dso_local unnamed_addr alias void (ptr), ptr @_ZN1AD1Ev +; CHECK: @_ZN1AD1Ev = available_externally dso_local unnamed_addr alias void (ptr), ptr @_ZN1AD2Ev +; CHECK: @D1_a_a = available_externally dso_local unnamed_addr alias void (ptr), ptr @D1_a +; CHECK: define available_externally dso_local void @_ZN1AD2Ev(ptr noundef nonnull %0) unnamed_addr { +; CHECK: define available_externally dso_local void @_ZN1AD0Ev(ptr noundef nonnull %0) unnamed_addr { + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$_ZN1AD5Ev = comdat any +$_ZTV1A = comdat any + +@_ZTV1A = weak_odr unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN1AD1Ev, ptr @_ZN1AD0Ev] }, comdat + +@D1_a = weak_odr unnamed_addr alias void (ptr), ptr @_ZN1AD1Ev +@_ZN1AD1Ev = weak_odr unnamed_addr alias void (ptr), ptr @_ZN1AD2Ev +@D1_a_a = weak_odr unnamed_addr alias void (ptr), ptr @D1_a + +define weak_odr void @_ZN1AD2Ev(ptr noundef nonnull %0) unnamed_addr comdat($_ZN1AD5Ev) { + ret void +} + +define weak_odr void @_ZN1AD0Ev(ptr noundef nonnull %0) unnamed_addr comdat($_ZN1AD5Ev) { + call void @D1_a(ptr noundef nonnull %0) + call void @D1_a_a(ptr noundef nonnull %0) + call void @_ZN1AD1Ev(ptr noundef nonnull %0) + ret void +} diff --git a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll index 7b22180132e6a..2fb226046ea9f 100644 --- a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll @@ -1,33 +1,54 @@ -; This test ensures that we drop the preempted copy of @f from %t2.bc from its -; comdat after making it available_externally. If not we would get a -; verification error. +; This test ensures that we drop the preempted copy of @f/@g from %t2.bc from their +; comdats after making it available_externally. If not we would get a +; verification error. g_internal/g_private are changed to available_externally +; as well since it is in the same comdat of g. ; RUN: opt -module-summary %s -o %t1.bc ; RUN: opt -module-summary %p/Inputs/linkonce_resolution_comdat.ll -o %t2.bc -; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -thinlto-save-temps=%t3. +; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT1 ; RUN: llvm-dis %t3.1.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT2 ; Copy from first module is prevailing and converted to weak_odr, copy ; from second module is preempted and converted to available_externally and ; removed from comdat. -; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat($c1) { +; IMPORT1: @g_private = private global i32 43, comdat($g) +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat { +; IMPORT1: define weak_odr i32 @g() unnamed_addr [[ATTR]] comdat { +; IMPORT1: define internal void @g_internal() unnamed_addr comdat($g) { + +; IMPORT2: @g_private = available_externally dso_local global i32 41{{$}} ; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] { +; IMPORT2: define available_externally i32 @g() unnamed_addr [[ATTR]] { +; IMPORT2: define available_externally dso_local void @g_internal() unnamed_addr { ; CHECK-DAG: attributes [[ATTR]] = { norecurse nounwind } -; RUN: llvm-nm -o - < %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 ; NM1: W f +; NM1: W g -; RUN: llvm-nm -o - < %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 +; RUN: llvm-nm %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 ; f() would have been turned into available_externally since it is preempted, -; and inlined into g() +; and inlined into h() ; NM2-NOT: f +; NM2-NOT: g target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c1 = comdat any +$f = comdat any +$g = comdat any + +@g_private = private global i32 43, comdat($g) -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c1) { +define linkonce_odr i32 @f(i8*) unnamed_addr comdat { ret i32 43 } + +define linkonce_odr i32 @g() unnamed_addr comdat { + ret i32 43 +} + +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +} From 108e41d962463ea1cb956d1a929692a5b49c0a80 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Mon, 7 Nov 2022 18:25:46 +0000 Subject: [PATCH 455/516] [clang][NFC] Use c++17 style variable type traits This was done as a test for D137302 and it makes sense to push these changes Reviewed By: shafik Differential Revision: https://reviews.llvm.org/D137491 --- clang/lib/AST/ASTContext.cpp | 6 ++---- clang/lib/AST/ASTImporter.cpp | 4 ++-- clang/lib/AST/Comment.cpp | 2 +- clang/lib/AST/Decl.cpp | 3 +-- clang/lib/AST/Interp/Disasm.cpp | 2 +- clang/lib/Analysis/CFG.cpp | 6 +++--- clang/lib/Analysis/RetainSummaryManager.cpp | 2 +- clang/lib/Basic/SourceLocation.cpp | 4 ++-- clang/lib/Frontend/CompilerInvocation.cpp | 3 +-- clang/lib/Lex/MacroArgs.cpp | 4 ++-- clang/lib/Sema/SemaDeclAttr.cpp | 2 +- clang/lib/Sema/SemaTemplateDeduction.cpp | 10 +++++----- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 4 ++-- clang/lib/Tooling/ASTDiff/ASTDiff.cpp | 6 ++---- clang/unittests/Lex/HeaderMapTest.cpp | 6 ++---- clang/unittests/Tooling/ASTSelectionTest.cpp | 20 ++++++++++---------- clang/unittests/Tooling/Syntax/TreeTest.cpp | 5 ++--- 17 files changed, 40 insertions(+), 49 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index f07c40cb6c5d9..52b361328ebc6 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12280,16 +12280,14 @@ static Decl *getCommonDecl(Decl *X, Decl *Y) { llvm_unreachable("Corrupt redecls chain"); } -template ::value, bool> = true> +template , bool> = true> T *getCommonDecl(T *X, T *Y) { return cast_or_null( getCommonDecl(const_cast(cast_or_null(X)), const_cast(cast_or_null(Y)))); } -template ::value, bool> = true> +template , bool> = true> T *getCommonDeclChecked(T *X, T *Y) { return cast(getCommonDecl(const_cast(cast(X)), const_cast(cast(Y)))); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 631dfaebabbd6..88262268fc97c 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -200,8 +200,8 @@ namespace clang { // cast the return value to `T`. template auto import(T *From) - -> std::conditional_t::value, - Expected, Expected> { + -> std::conditional_t, Expected, + Expected> { auto ToOrErr = Importer.Import(From); if (!ToOrErr) return ToOrErr.takeError(); diff --git a/clang/lib/AST/Comment.cpp b/clang/lib/AST/Comment.cpp index eaa235bbe6103..4cf3bb39c4e81 100644 --- a/clang/lib/AST/Comment.cpp +++ b/clang/lib/AST/Comment.cpp @@ -29,7 +29,7 @@ namespace comments { #undef ABSTRACT_COMMENT // DeclInfo is also allocated with a BumpPtrAllocator. -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "DeclInfo should be trivially destructible!"); const char *Comment::getCommentKindName() const { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 04808643ab84a..1efe9c6d40dc0 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -188,8 +188,7 @@ static bool usesTypeVisibility(const NamedDecl *D) { /// Does the given declaration have member specialization information, /// and if so, is it an explicit specialization? template -static std::enable_if_t::value, - bool> +static std::enable_if_t, bool> isExplicitMemberSpecialization(const T *D) { if (const MemberSpecializationInfo *member = D->getMemberSpecializationInfo()) { diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp index 82debe4fcae16..d31e879d516fb 100644 --- a/clang/lib/AST/Interp/Disasm.cpp +++ b/clang/lib/AST/Interp/Disasm.cpp @@ -22,7 +22,7 @@ using namespace clang; using namespace clang::interp; template inline T ReadArg(Program &P, CodePtr &OpPC) { - if constexpr (std::is_pointer::value) { + if constexpr (std::is_pointer_v) { uint32_t ID = OpPC.read(); return reinterpret_cast(P.getNativePointer(ID)); } else { diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 20c6c68e44a07..458de974e46bf 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -727,9 +727,9 @@ class CFGBuilder { // hence strict duck-typing. template ::value || - std::is_base_of::value || - std::is_base_of::value>> + std::is_base_of_v || + std::is_base_of_v || + std::is_base_of_v>> void findConstructionContextsForArguments(CallLikeExpr *E) { for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp index 5e9c73534aeba..143c037dda9f6 100644 --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -32,7 +32,7 @@ constexpr static bool isOneOf() { /// rest of varargs. template constexpr static bool isOneOf() { - return std::is_same::value || isOneOf(); + return std::is_same_v || isOneOf(); } namespace { diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp index 6e5e55fb09cef..f9ecd52e5f27b 100644 --- a/clang/lib/Basic/SourceLocation.cpp +++ b/clang/lib/Basic/SourceLocation.cpp @@ -42,11 +42,11 @@ void PrettyStackTraceLoc::print(raw_ostream &OS) const { // SourceLocation //===----------------------------------------------------------------------===// -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "SourceLocation must be trivially destructible because it is " "used in unions"); -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "SourceRange must be trivially destructible because it is " "used in unions"); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 6b8808078cd6c..b004c6c21be49 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -199,8 +199,7 @@ static void denormalizeSimpleFlag(SmallVectorImpl &Args, } template static constexpr bool is_uint64_t_convertible() { - return !std::is_same::value && - llvm::is_integral_or_enum::value; + return !std::is_same_v && llvm::is_integral_or_enum::value; } template ::value, + static_assert(std::is_trivial_v, "assume trivial copyability if copying into the " "uninitialized array (as opposed to reusing a cached " "MacroArgs)"); @@ -94,7 +94,7 @@ MacroArgs *MacroArgs::deallocate() { // Run the dtor to deallocate the vectors. this->~MacroArgs(); // Release the memory for the object. - static_assert(std::is_trivially_destructible::value, + static_assert(std::is_trivially_destructible_v, "assume trivially destructible and forego destructors"); free(this); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index da9aa611793f9..a747eb7bfe8cd 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -202,7 +202,7 @@ static unsigned getNumAttributeArgs(const ParsedAttr &AL) { /// A helper function to provide Attribute Location for the Attr types /// AND the ParsedAttr. template -static std::enable_if_t::value, SourceLocation> +static std::enable_if_t, SourceLocation> getAttrLoc(const AttrInfo &AL) { return AL.getLocation(); } diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 3db06a51e4eb7..6d57cd8542d6d 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5491,12 +5491,12 @@ namespace { // specialized than primary" check. struct GetP2 { template ::value, bool> = true> + std::enable_if_t, bool> = true> T2 *operator()(T1 *, T2 *P2) { return P2; } template ::value, bool> = true> + std::enable_if_t, bool> = true> T1 *operator()(T1 *, T2 *) { return nullptr; } @@ -5508,7 +5508,7 @@ struct TemplateArgumentListAreEqual { TemplateArgumentListAreEqual(ASTContext &Ctx) : Ctx(Ctx) {} template ::value, bool> = true> + std::enable_if_t, bool> = true> bool operator()(T1 *PS1, T2 *PS2) { ArrayRef Args1 = PS1->getTemplateArgs().asArray(), Args2 = PS2->getTemplateArgs().asArray(); @@ -5527,7 +5527,7 @@ struct TemplateArgumentListAreEqual { } template ::value, bool> = true> + std::enable_if_t, bool> = true> bool operator()(T1 *Spec, T2 *Primary) { ArrayRef Args1 = Spec->getTemplateArgs().asArray(), Args2 = Primary->getInjectedTemplateArgs(); @@ -5576,7 +5576,7 @@ static TemplateLikeDecl * getMoreSpecialized(Sema &S, QualType T1, QualType T2, TemplateLikeDecl *P1, PrimaryDel *P2, TemplateDeductionInfo &Info) { constexpr bool IsMoreSpecialThanPrimaryCheck = - !std::is_same::value; + !std::is_same_v; bool Better1 = isAtLeastAsSpecializedAs(S, T1, T2, P2, Info); if (IsMoreSpecialThanPrimaryCheck && !Better1) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 1a2578b85f08f..a90e17fc496d2 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -912,9 +912,9 @@ static void printStateTraitWithLocationContextJson( // Try to do as much compile time checking as possible. // FIXME: check for invocable instead of function? - static_assert(std::is_function>::value, + static_assert(std::is_function_v>, "Printer is not a function!"); - static_assert(std::is_convertible::value, + static_assert(std::is_convertible_v, "Printer doesn't have the required type!"); if (LCtx && !State->get().isEmpty()) { diff --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp index 0821863adcc6e..6b359c1910bca 100644 --- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -117,13 +117,11 @@ class SyntaxTree::Impl { Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); template Impl(SyntaxTree *Parent, - std::enable_if_t::value, T> *Node, - ASTContext &AST) + std::enable_if_t, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} template Impl(SyntaxTree *Parent, - std::enable_if_t::value, T> *Node, - ASTContext &AST) + std::enable_if_t, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} SyntaxTree *Parent; diff --git a/clang/unittests/Lex/HeaderMapTest.cpp b/clang/unittests/Lex/HeaderMapTest.cpp index 4220edb2908e0..5484041844ea7 100644 --- a/clang/unittests/Lex/HeaderMapTest.cpp +++ b/clang/unittests/Lex/HeaderMapTest.cpp @@ -115,8 +115,7 @@ template struct PaddedFile { TEST(HeaderMapTest, lookupFilenameTruncatedSuffix) { typedef HMapFileMock<2, 64 - sizeof(HMapHeader) - 2 * sizeof(HMapBucket)> FileTy; - static_assert(std::is_standard_layout::value, - "Expected standard layout"); + static_assert(std::is_standard_layout_v, "Expected standard layout"); static_assert(sizeof(FileTy) == 64, "check the math"); PaddedFile P; auto &File = P.File; @@ -151,8 +150,7 @@ TEST(HeaderMapTest, lookupFilenameTruncatedSuffix) { TEST(HeaderMapTest, lookupFilenameTruncatedPrefix) { typedef HMapFileMock<2, 64 - sizeof(HMapHeader) - 2 * sizeof(HMapBucket)> FileTy; - static_assert(std::is_standard_layout::value, - "Expected standard layout"); + static_assert(std::is_standard_layout_v, "Expected standard layout"); static_assert(sizeof(FileTy) == 64, "check the math"); PaddedFile P; auto &File = P.File; diff --git a/clang/unittests/Tooling/ASTSelectionTest.cpp b/clang/unittests/Tooling/ASTSelectionTest.cpp index 88988ef447875..531f9ac89f441 100644 --- a/clang/unittests/Tooling/ASTSelectionTest.cpp +++ b/clang/unittests/Tooling/ASTSelectionTest.cpp @@ -101,22 +101,22 @@ void checkDeclName(const SelectedASTNode &Node, StringRef Name) { } template -const SelectedASTNode &checkNode( - const SelectedASTNode &StmtNode, SourceSelectionKind SelectionKind, - unsigned NumChildren = 0, - std::enable_if_t::value, T> *StmtOverloadChecker = - nullptr) { +const SelectedASTNode & +checkNode(const SelectedASTNode &StmtNode, SourceSelectionKind SelectionKind, + unsigned NumChildren = 0, + std::enable_if_t, T> *StmtOverloadChecker = + nullptr) { checkNodeImpl(isa(StmtNode.Node.get()), StmtNode, SelectionKind, NumChildren); return StmtNode; } template -const SelectedASTNode &checkNode( - const SelectedASTNode &DeclNode, SourceSelectionKind SelectionKind, - unsigned NumChildren = 0, StringRef Name = "", - std::enable_if_t::value, T> *DeclOverloadChecker = - nullptr) { +const SelectedASTNode & +checkNode(const SelectedASTNode &DeclNode, SourceSelectionKind SelectionKind, + unsigned NumChildren = 0, StringRef Name = "", + std::enable_if_t, T> *DeclOverloadChecker = + nullptr) { checkNodeImpl(isa(DeclNode.Node.get()), DeclNode, SelectionKind, NumChildren); if (!Name.empty()) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index 712d2bd40fbbc..44cf42fa944a2 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -151,9 +151,8 @@ TEST_F(TreeTest, Iterators) { // FIXME: mutate and observe no invalidation. Mutations are private for now... auto It = Range.begin(); auto CIt = ConstRange.begin(); - static_assert(std::is_same::value, - "mutable range"); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "mutable range"); + static_assert(std::is_same_v, "const range"); for (unsigned I = 0; I < 3; ++I) { From ec1bd2546d343aad0d4d8d02c695493f59567553 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 4 Nov 2022 14:02:11 -0700 Subject: [PATCH 456/516] [NFC][lldb] Remove unnecessary branch in TypeSystemClang::DumpTypeDescription() Reviewed By: Michael137 Differential Revision: https://reviews.llvm.org/D137464 --- .../Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 5175ad81606d1..cd142b73ab824 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -9221,14 +9221,8 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type, if (level == eDescriptionLevelVerbose) record_decl->dump(llvm_ostrm); else { - if (auto *cxx_record_decl = - llvm::dyn_cast(record_decl)) - cxx_record_decl->print(llvm_ostrm, - getASTContext().getPrintingPolicy(), - s->GetIndentLevel()); - else - record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(), - s->GetIndentLevel()); + record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(), + s->GetIndentLevel()); } } break; From 583450fa0988d1ac088d36ec840cec4f84e013c4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 31 Oct 2022 16:49:43 -0700 Subject: [PATCH 457/516] AMDGPU: Fix DivergenceAnalysis for llvm.read_register This was treating all calls as uniform by default, which is wrong if used to read a VGPR. --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 27 +++- .../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 + .../AMDGPU/read_register.ll | 142 ++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f3310a6ec3684..483c7037acf34 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -857,6 +857,27 @@ bool GCNTTIImpl::useGPUDivergenceAnalysis() const { return !UseLegacyDA; } +bool GCNTTIImpl::isReadRegisterSourceOfDivergence( + const IntrinsicInst *ReadReg) const { + Metadata *MD = + cast(ReadReg->getArgOperand(0))->getMetadata(); + StringRef RegName = + cast(cast(MD)->getOperand(0))->getString(); + + // Special case registers that look like VCC. + MVT VT = MVT::getVT(ReadReg->getType()); + if (VT == MVT::i1) + return true; + + // Special case scalar registers that start with 'v'. + if (RegName.startswith("vcc") || RegName.empty()) + return false; + + // VGPR or AGPR is divergent. There aren't any specially named vector + // registers. + return RegName[0] == 'v' || RegName[0] == 'a'; +} + /// \returns true if the result of the value could potentially be /// different across workitems in a wavefront. bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { @@ -880,8 +901,12 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { if (isa(V) || isa(V)) return true; - if (const IntrinsicInst *Intrinsic = dyn_cast(V)) + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + if (Intrinsic->getIntrinsicID() == Intrinsic::read_register) + return isReadRegisterSourceOfDivergence(Intrinsic); + return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); + } // Assume all function calls are a source of divergence. if (const CallInst *CI = dyn_cast(V)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 4ee785f83ba24..fb54cfd09da30 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -162,6 +162,8 @@ class GCNTTIImpl final : public BasicTTIImplBase { using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); + + bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const; bool isSourceOfDivergence(const Value *V) const; bool isAlwaysUniform(const Value *V) const; diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll new file mode 100644 index 0000000000000..91e5d588710ab --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll @@ -0,0 +1,142 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx90a -passes='print' -disable-output %s 2>&1 | FileCheck %s + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec': +; CHECK-NOT: DIVERGENT +define i64 @read_register_exec() { + %reg = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_m0': +; CHECK-NOT: DIVERGENT +define i32 @read_register_m0() { + %reg = call i32 @llvm.read_register.i32(metadata !1) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17': +; CHECK-NOT: DIVERGENT +define i32 @read_register_s17() { + %reg = call i32 @llvm.read_register.i32(metadata !2) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17_i17': +; CHECK-NOT: DIVERGENT +define i17 @read_register_s17_i17() { + %reg = call i17 @llvm.read_register.i17(metadata !2) + ret i17 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0': +; CHECK: DIVERGENT +define i32 @read_register_v0() { + %reg = call i32 @llvm.read_register.i32(metadata !3) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0_v1': +; CHECK: DIVERGENT +define i64 @read_register_v0_v1() { + %reg = call i64 @llvm.read_register.i64(metadata !4) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0': +; CHECK: DIVERGENT +define i32 @read_register_a0() { + %reg = call i32 @llvm.read_register.i32(metadata !5) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0_a1': +; CHECK: DIVERGENT +define i64 @read_register_a0_a1() { + %reg = call i64 @llvm.read_register.i64(metadata !6) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i64': +; CHECK-NOT: DIVERGENT +define i64 @read_register_vcc_i64() { + %reg = call i64 @llvm.read_register.i64(metadata !7) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i1': +; CHECK: DIVERGENT +define i1 @read_register_vcc_i1() { + %reg = call i1 @llvm.read_register.i1(metadata !7) + ret i1 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_invalid_reg': +; CHECK-NOT: DIVERGENT +define i64 @read_register_invalid_reg() { + %reg = call i64 @llvm.read_register.i64(metadata !8) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_flat_scratch': +; CHECK-NOT: DIVERGENT +define i32 @read_register_flat_scratch() { + %reg = call i32 @llvm.read_register.i32(metadata !9) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_lo_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_vcc_lo_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !10) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_hi_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_vcc_hi_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !11) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_lo_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_exec_lo_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !12) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_hi_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_exec_hi_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !13) + ret i32 %reg +} + +; FIXME: Why does the verifier allow this? +; CHECK-LABEL: Divergence Analysis' for function 'read_register_empty_str_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_empty_str_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !14) + ret i32 %reg +} + +declare i64 @llvm.read_register.i64(metadata) +declare i32 @llvm.read_register.i32(metadata) +declare i17 @llvm.read_register.i17(metadata) +declare i1 @llvm.read_register.i1(metadata) + +!0 = !{!"exec"} +!1 = !{!"m0"} +!2 = !{!"s17"} +!3 = !{!"v0"} +!4 = !{!"v[0:1]"} +!5 = !{!"a0"} +!6 = !{!"a[0:1]"} +!7 = !{!"vcc"} +!8 = !{!"not a register"} +!9 = !{!"flat_scratch"} +!10 = !{!"vcc_lo"} +!11 = !{!"vcc_hi"} +!12 = !{!"exec_lo"} +!13 = !{!"exec_hi"} +!14 = !{!""} From 28e312cbf098c05af9a1865805274cf7ab470dfd Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 3 Nov 2022 15:21:30 -0700 Subject: [PATCH 458/516] [libc][obvious] fix printf failing to stop on %\0 Previously, the printf parser would treat "%\0" as a conversion with the name "\0", and advance past the null byte causing a buffer overflow. This patch corrects that in both printf and scanf. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D137367 --- libc/src/stdio/printf_core/parser.cpp | 11 +++++++++-- libc/src/stdio/scanf_core/parser.cpp | 9 ++++++++- libc/test/src/stdio/printf_core/parser_test.cpp | 13 +++++++++++++ libc/test/src/stdio/scanf_core/parser_test.cpp | 13 +++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp index 38d2e1e69cf92..6a27c1c854824 100644 --- a/libc/src/stdio/printf_core/parser.cpp +++ b/libc/src/stdio/printf_core/parser.cpp @@ -151,7 +151,11 @@ FormatSection Parser::get_next_section() { section.has_conv = false; break; } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos. + if (str[cur_pos] != '\0') + ++cur_pos; + } else { // raw section section.has_conv = false; @@ -372,7 +376,10 @@ Parser::TypeDesc Parser::get_type_desc(size_t index) { if (conv_index == index) return conv_size; } - ++local_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the local_pos. + if (str[local_pos] != '\0') + ++local_pos; } // If there is no size for the requested index, then just guess that it's an diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp index 31dd118ad17a7..76e658e376e05 100644 --- a/libc/src/stdio/scanf_core/parser.cpp +++ b/libc/src/stdio/scanf_core/parser.cpp @@ -74,7 +74,14 @@ FormatSection Parser::get_next_section() { section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos and we should not count this has having a + // conversion. + if (str[cur_pos] != '\0') { + ++cur_pos; + } else { + section.has_conv = false; + } // If the format is a bracketed one, then we need to parse out the insides // of the brackets. diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp index 0684ebc8d444d..3ae8bf47c8909 100644 --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -102,6 +102,19 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArg) { ASSERT_PFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcPrintfParserTest, EvalBadArg) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, arg1); + + __llvm_libc::printf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) { __llvm_libc::printf_core::FormatSection format_arr[10]; const char *str = "%+-0 #d"; diff --git a/libc/test/src/stdio/scanf_core/parser_test.cpp b/libc/test/src/stdio/scanf_core/parser_test.cpp index e2ed4b026e5d0..3d2c0817e880f 100644 --- a/libc/test/src/stdio/scanf_core/parser_test.cpp +++ b/libc/test/src/stdio/scanf_core/parser_test.cpp @@ -103,6 +103,19 @@ TEST(LlvmLibcScanfParserTest, EvalOneArg) { ASSERT_SFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcScanfParserTest, EvalBadArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcScanfParserTest, EvalOneArgWithFlag) { __llvm_libc::scanf_core::FormatSection format_arr[10]; const char *str = "%*d"; From fdf22598862a072fe6136bc0484b5747e68da20d Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Mon, 7 Nov 2022 11:10:58 -0800 Subject: [PATCH 459/516] [NFC] Comment in MLInlineAdvisor as to why use std::map for FPICache --- llvm/include/llvm/Analysis/MLInlineAdvisor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 3db948d365c77..7535464e07100 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -69,6 +69,9 @@ class MLInlineAdvisor : public InlineAdvisor { getSkipAdviceIfUnreachableCallsite(CallBase &CB); void print(raw_ostream &OS) const override; + // Using std::map to benefit from its iterator / reference non-invalidating + // semantics, which make it easy to use `getCachedFPI` results from multiple + // calls without needing to copy to avoid invalidation effects. mutable std::map FPICache; LazyCallGraph &CG; From 463da45892e2d2a262277b91b96f5f8c05dc25d0 Mon Sep 17 00:00:00 2001 From: Emilio Cota Date: Mon, 7 Nov 2022 14:21:06 -0500 Subject: [PATCH 460/516] [bazel][mlir] NVGPUToNVVM fixes for 708185f03ff --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 73b09e6fe87c3..d5f2aabf2dcaa 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4000,11 +4000,10 @@ cc_library( ":GPUDialect", ":IR", ":LLVMCommonConversion", + ":LLVMDialect", ":NVGPUDialect", ":NVVMDialect", ":Pass", - ":Transforms", - "//llvm:Support", ], ) From cafe50daf525971ffc3b8c5f2f6343d24e381384 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 4 Nov 2022 16:19:18 -0700 Subject: [PATCH 461/516] Explicitly initialize opaque pointer mode in CodeGenAction Explicitly call `LLVMContext::setOpaquePointers` in `CodeGenAction` before loading any IR files. With this we use the mode specified on the command-line rather than lazily initializing it based on the contents of the IR. This helps when using `-fthinlto-index` which may end up mixing files with typed and opaque pointer types which fails when the first file happened to use typed pointers since we cannot downgrade IR with opaque pointer types to typed pointer types. Differential Revision: https://reviews.llvm.org/D137475 --- clang/lib/CodeGen/CodeGenAction.cpp | 2 ++ clang/test/CodeGen/Inputs/thinlto-opaque.ll | 6 +++++ .../CodeGen/thinlto-distributed-cfi-devirt.ll | 2 +- .../test/CodeGen/thinlto-opaque-typed-mix.ll | 23 +++++++++++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/Inputs/thinlto-opaque.ll create mode 100644 clang/test/CodeGen/thinlto-opaque-typed-mix.ll diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 52d0417a4fa6b..b723a52fbdd59 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1102,6 +1102,8 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); + VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); + // For ThinLTO backend invocations, ensure that the context // merges types based on ODR identifiers. We also need to read // the correct module out of a multi-module bitcode file. diff --git a/clang/test/CodeGen/Inputs/thinlto-opaque.ll b/clang/test/CodeGen/Inputs/thinlto-opaque.ll new file mode 100644 index 0000000000000..bd576ab830143 --- /dev/null +++ b/clang/test/CodeGen/Inputs/thinlto-opaque.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--" + +define ptr @f2() { + ret ptr null +} diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 959d89d61ab27..2309ed717c2a2 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -100,7 +100,7 @@ cont2: ; CHECK-IR: br i1 {{.*}}, label %trap ; We still have to call it as virtual. - ; CHECK-IR: %call3 = tail call i32 %7 + ; CHECK-IR: %call3 = tail call i32 {{%[0-9]+}} %call3 = tail call i32 %8(%struct.A* nonnull %obj, i32 %call) ret i32 %call3 } diff --git a/clang/test/CodeGen/thinlto-opaque-typed-mix.ll b/clang/test/CodeGen/thinlto-opaque-typed-mix.ll new file mode 100644 index 0000000000000..1cd301f290e9b --- /dev/null +++ b/clang/test/CodeGen/thinlto-opaque-typed-mix.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86-registered-target +; Test that mixing bitcode file with opaque and typed pointers works. + +; RUN: mkdir -p %t +; RUN: opt -module-summary -o %t/typed.bc %s +; RUN: opt -module-summary -o %t/opaque.bc %S/Inputs/thinlto-opaque.ll +; RUN: llvm-lto2 run -thinlto-distributed-indexes %t/typed.bc %t/opaque.bc \ +; RUN: -o %t/native.o -r %t/typed.bc,main,plx -r %t/typed.bc,f2, \ +; RUN: -r %t/opaque.bc,f2,p + +; RUN: %clang_cc1 -triple x86_64-- -emit-obj -o %t/native.o %t/typed.bc \ +; RUN: -Wno-override-module \ +; RUN: -fthinlto-index=%t/typed.bc.thinlto.bc + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--" + +declare i8* @f2() + +define i32 @main() { + call i8* @f2() + ret i32 0 +} From cd8c6ab084c05b50c41163cd9f415a4c70aea8e9 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 7 Nov 2022 13:20:09 -0800 Subject: [PATCH 462/516] [NFC] clang-format part of TypePrinter.cpp Fixes weird clang-format indentation with an upcoming patch. --- clang/lib/AST/TypePrinter.cpp | 178 ++++++++++++++++------------------ 1 file changed, 86 insertions(+), 92 deletions(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index f6e67a805f740..f7421172731b1 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -49,109 +49,103 @@ using namespace clang; namespace { - /// RAII object that enables printing of the ARC __strong lifetime - /// qualifier. - class IncludeStrongLifetimeRAII { - PrintingPolicy &Policy; - bool Old; - - public: - explicit IncludeStrongLifetimeRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressStrongLifetime) { - if (!Policy.SuppressLifetimeQualifiers) - Policy.SuppressStrongLifetime = false; - } +/// RAII object that enables printing of the ARC __strong lifetime +/// qualifier. +class IncludeStrongLifetimeRAII { + PrintingPolicy &Policy; + bool Old; + +public: + explicit IncludeStrongLifetimeRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressStrongLifetime) { + if (!Policy.SuppressLifetimeQualifiers) + Policy.SuppressStrongLifetime = false; + } - ~IncludeStrongLifetimeRAII() { - Policy.SuppressStrongLifetime = Old; - } - }; + ~IncludeStrongLifetimeRAII() { Policy.SuppressStrongLifetime = Old; } +}; - class ParamPolicyRAII { - PrintingPolicy &Policy; - bool Old; +class ParamPolicyRAII { + PrintingPolicy &Policy; + bool Old; - public: - explicit ParamPolicyRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressSpecifiers) { - Policy.SuppressSpecifiers = false; - } +public: + explicit ParamPolicyRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressSpecifiers) { + Policy.SuppressSpecifiers = false; + } - ~ParamPolicyRAII() { - Policy.SuppressSpecifiers = Old; - } - }; + ~ParamPolicyRAII() { Policy.SuppressSpecifiers = Old; } +}; - class DefaultTemplateArgsPolicyRAII { - PrintingPolicy &Policy; - bool Old; +class DefaultTemplateArgsPolicyRAII { + PrintingPolicy &Policy; + bool Old; - public: - explicit DefaultTemplateArgsPolicyRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressDefaultTemplateArgs) { - Policy.SuppressDefaultTemplateArgs = false; - } +public: + explicit DefaultTemplateArgsPolicyRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressDefaultTemplateArgs) { + Policy.SuppressDefaultTemplateArgs = false; + } - ~DefaultTemplateArgsPolicyRAII() { - Policy.SuppressDefaultTemplateArgs = Old; - } - }; - - class ElaboratedTypePolicyRAII { - PrintingPolicy &Policy; - bool SuppressTagKeyword; - bool SuppressScope; - - public: - explicit ElaboratedTypePolicyRAII(PrintingPolicy &Policy) : Policy(Policy) { - SuppressTagKeyword = Policy.SuppressTagKeyword; - SuppressScope = Policy.SuppressScope; - Policy.SuppressTagKeyword = true; - Policy.SuppressScope = true; - } + ~DefaultTemplateArgsPolicyRAII() { Policy.SuppressDefaultTemplateArgs = Old; } +}; - ~ElaboratedTypePolicyRAII() { - Policy.SuppressTagKeyword = SuppressTagKeyword; - Policy.SuppressScope = SuppressScope; - } - }; - - class TypePrinter { - PrintingPolicy Policy; - unsigned Indentation; - bool HasEmptyPlaceHolder = false; - bool InsideCCAttribute = false; - - public: - explicit TypePrinter(const PrintingPolicy &Policy, unsigned Indentation = 0) - : Policy(Policy), Indentation(Indentation) {} - - void print(const Type *ty, Qualifiers qs, raw_ostream &OS, - StringRef PlaceHolder); - void print(QualType T, raw_ostream &OS, StringRef PlaceHolder); - - static bool canPrefixQualifiers(const Type *T, bool &NeedARCStrongQualifier); - void spaceBeforePlaceHolder(raw_ostream &OS); - void printTypeSpec(NamedDecl *D, raw_ostream &OS); - void printTemplateId(const TemplateSpecializationType *T, raw_ostream &OS, - bool FullyQualify); - - void printBefore(QualType T, raw_ostream &OS); - void printAfter(QualType T, raw_ostream &OS); - void AppendScope(DeclContext *DC, raw_ostream &OS, - DeclarationName NameInScope); - void printTag(TagDecl *T, raw_ostream &OS); - void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); +class ElaboratedTypePolicyRAII { + PrintingPolicy &Policy; + bool SuppressTagKeyword; + bool SuppressScope; + +public: + explicit ElaboratedTypePolicyRAII(PrintingPolicy &Policy) : Policy(Policy) { + SuppressTagKeyword = Policy.SuppressTagKeyword; + SuppressScope = Policy.SuppressScope; + Policy.SuppressTagKeyword = true; + Policy.SuppressScope = true; + } + + ~ElaboratedTypePolicyRAII() { + Policy.SuppressTagKeyword = SuppressTagKeyword; + Policy.SuppressScope = SuppressScope; + } +}; + +class TypePrinter { + PrintingPolicy Policy; + unsigned Indentation; + bool HasEmptyPlaceHolder = false; + bool InsideCCAttribute = false; + +public: + explicit TypePrinter(const PrintingPolicy &Policy, unsigned Indentation = 0) + : Policy(Policy), Indentation(Indentation) {} + + void print(const Type *ty, Qualifiers qs, raw_ostream &OS, + StringRef PlaceHolder); + void print(QualType T, raw_ostream &OS, StringRef PlaceHolder); + + static bool canPrefixQualifiers(const Type *T, bool &NeedARCStrongQualifier); + void spaceBeforePlaceHolder(raw_ostream &OS); + void printTypeSpec(NamedDecl *D, raw_ostream &OS); + void printTemplateId(const TemplateSpecializationType *T, raw_ostream &OS, + bool FullyQualify); + + void printBefore(QualType T, raw_ostream &OS); + void printAfter(QualType T, raw_ostream &OS); + void AppendScope(DeclContext *DC, raw_ostream &OS, + DeclarationName NameInScope); + void printTag(TagDecl *T, raw_ostream &OS); + void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); #define ABSTRACT_TYPE(CLASS, PARENT) -#define TYPE(CLASS, PARENT) \ - void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ - void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); +#define TYPE(CLASS, PARENT) \ + void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ + void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); #include "clang/AST/TypeNodes.inc" - private: - void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); - void printAfter(const Type *ty, Qualifiers qs, raw_ostream &OS); - }; +private: + void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); + void printAfter(const Type *ty, Qualifiers qs, raw_ostream &OS); +}; } // namespace From d91460b1401f4d7b0e2e3ecae1238ec9bf5644c2 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 3 Nov 2022 20:10:11 +0000 Subject: [PATCH 463/516] Apply clang-tidy fixes for performance-move-const-arg in IndexOps.cpp (NFC) --- mlir/lib/Dialect/Index/IR/IndexOps.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Index/IR/IndexOps.cpp b/mlir/lib/Dialect/Index/IR/IndexOps.cpp index 241fa416eddab..2178a758b3dc2 100644 --- a/mlir/lib/Dialect/Index/IR/IndexOps.cpp +++ b/mlir/lib/Dialect/Index/IR/IndexOps.cpp @@ -74,7 +74,7 @@ static OpFoldResult foldBinaryOpUnchecked( return {}; assert(result->trunc(32) == calculate(lhs.getValue().trunc(32), rhs.getValue().trunc(32))); - return IntegerAttr::get(IndexType::get(lhs.getContext()), std::move(*result)); + return IntegerAttr::get(IndexType::get(lhs.getContext()), *result); } /// Fold an index operation only if the truncated 64-bit result matches the @@ -107,8 +107,7 @@ static OpFoldResult foldBinaryOpChecked( if (result64->trunc(32) != *result32) return {}; // The operation can be folded for these particular operands. - return IntegerAttr::get(IndexType::get(lhs.getContext()), - std::move(*result64)); + return IntegerAttr::get(IndexType::get(lhs.getContext()), *result64); } //===----------------------------------------------------------------------===// From 93cb2b093343ced0604fa1dee7688b8fc29543ef Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 3 Nov 2022 20:12:14 +0000 Subject: [PATCH 464/516] Apply clang-tidy fixes for performance-unnecessary-value-param in LinalgOps.cpp (NFC) --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 3069ddce0f5be..8ce1ad070f46a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -836,8 +836,8 @@ ParseResult GenericOp::parse(OpAsmParser &parser, OperationState &result) { static void getGenericEffectsImpl( SmallVectorImpl> &effects, - ValueRange results, OpOperandVector inputOperands, - OpOperandVector outputOperands) { + ValueRange results, const OpOperandVector &inputOperands, + const OpOperandVector &outputOperands) { for (auto *operand : inputOperands) { if (!operand->get().getType().isa()) continue; From d8233b5b4f4394b6d8fd969cb786bd702982f5cb Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 7 Nov 2022 21:26:21 +0000 Subject: [PATCH 465/516] Fix MLIR doc build --- mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt index b737819b8d8aa..0f98a1efbcb79 100644 --- a/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt @@ -1,2 +1,2 @@ add_mlir_dialect(MathOps math) -add_mlir_doc(MathOps MathOps Dialects/ -gen-dialect-doc) +add_mlir_doc(MathOps MathOps Dialects/ -gen-dialect-doc -dialect math) From 43e52ad553cbafb78386e9e6e1204c8de8a506d4 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 4 Nov 2022 05:42:49 +0000 Subject: [PATCH 466/516] [libc] Add implementations of ftell. Reviewed By: michaelrj, lntue Differential Revision: https://reviews.llvm.org/D137395 --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/include/CMakeLists.txt | 1 + libc/spec/stdc.td | 5 ++ libc/src/__support/File/file.cpp | 23 +++++++- libc/src/__support/File/file.h | 10 +++- libc/src/__support/File/linux_file.cpp | 13 +++-- libc/src/stdio/CMakeLists.txt | 12 +++++ libc/src/stdio/fopencookie.cpp | 8 ++- libc/src/stdio/ftell.cpp | 20 +++++++ libc/src/stdio/ftell.h | 20 +++++++ libc/test/src/__support/File/file_test.cpp | 6 +-- libc/test/src/stdio/CMakeLists.txt | 19 +++++++ libc/test/src/stdio/ftell_test.cpp | 63 ++++++++++++++++++++++ 13 files changed, 191 insertions(+), 10 deletions(-) create mode 100644 libc/src/stdio/ftell.cpp create mode 100644 libc/src/stdio/ftell.h create mode 100644 libc/test/src/stdio/ftell_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 2c4867f48b1b9..8349af0e54e2a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -385,6 +385,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fread libc.src.stdio.fread_unlocked libc.src.stdio.fseek + libc.src.stdio.ftell libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index ac4e57572a696..f9d3ee5164f95 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -162,6 +162,7 @@ add_gen_header( .llvm-libc-macros.stdio_macros .llvm-libc-types.cookie_io_functions_t .llvm-libc-types.FILE + .llvm-libc-types.off_t .llvm-libc-types.size_t ) diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 64ee9b7c45399..bb3f9a1c701b7 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -563,6 +563,11 @@ def StdC : StandardSpec<"stdc"> { [ArgSpec, ArgSpec] >, + FunctionSpec< + "ftell", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "putc", RetValSpec, diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 9129f68b521d4..edb2467929f2d 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -294,7 +294,28 @@ int File::seek(long offset, int whence) { // Reset the eof flag as a seek might move the file positon to some place // readable. eof = false; - return platform_seek(this, offset, whence); + long platform_pos = platform_seek(this, offset, whence); + if (platform_pos >= 0) + return 0; + else + return -1; +} + +long File::tell() { + FileLock lock(this); + long platform_offset; + if (eof) + platform_offset = platform_seek(this, 0, SEEK_END); + else + platform_offset = platform_seek(this, 0, SEEK_CUR); + if (platform_offset < 0) + return -1; + if (prev_op == FileOp::READ) + return platform_offset - (read_limit - pos); + else if (prev_op == FileOp::WRITE) + return platform_offset + pos; + else + return platform_offset; } int File::flush_unlocked() { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index e08508bcb1d83..d182ea4c81b4d 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -28,7 +28,9 @@ class File { using WriteFunc = size_t(File *, const void *, size_t); using ReadFunc = size_t(File *, void *, size_t); - using SeekFunc = int(File *, long, int); + // The SeekFunc is expected to return the current offset of the external + // file position indicator. + using SeekFunc = long(File *, long, int); using CloseFunc = int(File *); using FlushFunc = int(File *); @@ -191,6 +193,8 @@ class File { int seek(long offset, int whence); + long tell(); + // If buffer has data written to it, flush it out. Does nothing if the // buffer is currently being used as a read buffer. int flush() { @@ -283,6 +287,10 @@ class File { // library. File *openfile(const char *path, const char *mode); +// The platform_file library should implement it if it relevant for that +// platform. +int get_fileno(File *f); + extern File *stdin; extern File *stdout; extern File *stderr; diff --git a/libc/src/__support/File/linux_file.cpp b/libc/src/__support/File/linux_file.cpp index c6c93c8ef5086..09a880743baf4 100644 --- a/libc/src/__support/File/linux_file.cpp +++ b/libc/src/__support/File/linux_file.cpp @@ -22,7 +22,7 @@ namespace { size_t write_func(File *, const void *, size_t); size_t read_func(File *, void *, size_t); -int seek_func(File *, long, int); +long seek_func(File *, long, int); int close_func(File *); int flush_func(File *); @@ -71,10 +71,12 @@ size_t read_func(File *f, void *buf, size_t size) { return ret; } -int seek_func(File *f, long offset, int whence) { +long seek_func(File *f, long offset, int whence) { auto *lf = reinterpret_cast(f); + long result; #ifdef SYS_lseek long ret = __llvm_libc::syscall_impl(SYS_lseek, lf->get_fd(), offset, whence); + result = ret; #elif defined(SYS__llseek) long result; long ret = __llvm_libc::syscall_impl(SYS__llseek, lf->get_fd(), offset >> 32, @@ -87,7 +89,7 @@ int seek_func(File *f, long offset, int whence) { errno = -ret; return -1; } - return 0; + return result; } int close_func(File *f) { @@ -164,6 +166,11 @@ File *openfile(const char *path, const char *mode) { return file; } +int get_fileno(File *f) { + auto *lf = reinterpret_cast(f); + return lf->get_fd(); +} + constexpr size_t STDIN_BUFFER_SIZE = 512; char stdin_buffer[STDIN_BUFFER_SIZE]; static LinuxFile StdIn(0, stdin_buffer, STDIN_BUFFER_SIZE, _IOFBF, false, diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 61ca8ce34bbba..f62b095997474 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -416,6 +416,18 @@ add_entrypoint_object( libc.src.stdio.printf_core.vfprintf_internal ) +add_entrypoint_object( + ftell + SRCS + ftell.cpp + HDRS + ftell.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( remove ALIAS diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp index 6facc969a2e5e..85f6de7595ce9 100644 --- a/libc/src/stdio/fopencookie.cpp +++ b/libc/src/stdio/fopencookie.cpp @@ -39,14 +39,18 @@ size_t read_func(File *f, void *data, size_t size) { reinterpret_cast(data), size); } -int seek_func(File *f, long offset, int whence) { +long seek_func(File *f, long offset, int whence) { auto cookie_file = reinterpret_cast(f); if (cookie_file->ops.seek == nullptr) { errno = EINVAL; return -1; } off64_t offset64 = offset; - return cookie_file->ops.seek(cookie_file->cookie, &offset64, whence); + int result = cookie_file->ops.seek(cookie_file->cookie, &offset64, whence); + if (result == 0) + return offset64; + else + return -1; } int close_func(File *f) { diff --git a/libc/src/stdio/ftell.cpp b/libc/src/stdio/ftell.cpp new file mode 100644 index 0000000000000..40783ac58fca4 --- /dev/null +++ b/libc/src/stdio/ftell.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of ftell -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ftell.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(long, ftell, (::FILE * stream)) { + return reinterpret_cast<__llvm_libc::File *>(stream)->tell(); +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/ftell.h b/libc/src/stdio/ftell.h new file mode 100644 index 0000000000000..95d4494709181 --- /dev/null +++ b/libc/src/stdio/ftell.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ftell --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FTELL_H +#define LLVM_LIBC_SRC_STDIO_FTELL_H + +#include + +namespace __llvm_libc { + +long ftell(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_FTELL_H diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp index 271f2dce4ef55..bdf8de3639eb6 100644 --- a/libc/test/src/__support/File/file_test.cpp +++ b/libc/test/src/__support/File/file_test.cpp @@ -26,7 +26,7 @@ class StringFile : public __llvm_libc::File { static size_t str_read(__llvm_libc::File *f, void *data, size_t len); static size_t str_write(__llvm_libc::File *f, const void *data, size_t len); - static int str_seek(__llvm_libc::File *f, long offset, int whence); + static long str_seek(__llvm_libc::File *f, long offset, int whence); static int str_close(__llvm_libc::File *f) { return 0; } static int str_flush(__llvm_libc::File *f) { return 0; } @@ -94,7 +94,7 @@ size_t StringFile::str_write(__llvm_libc::File *f, const void *data, return i; } -int StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { +long StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { StringFile *sf = static_cast(f); if (whence == SEEK_SET) sf->pos = offset; @@ -102,7 +102,7 @@ int StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { sf->pos += offset; if (whence == SEEK_END) sf->pos = SIZE + offset; - return 0; + return sf->pos; } StringFile *new_string_file(char *buffer, size_t buflen, int bufmode, diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index b453af2bb13c5..7c76c1990a685 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -235,6 +235,25 @@ add_libc_unittest( libc.src.stdio.fwrite ) +add_libc_unittest( + ftell_test + SUITE + libc_stdio_unittests + SRCS + ftell_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fflush + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.ftell + libc.src.stdio.fwrite + libc.src.stdio.setvbuf +) + add_subdirectory(printf_core) add_subdirectory(scanf_core) add_subdirectory(testdata) diff --git a/libc/test/src/stdio/ftell_test.cpp b/libc/test/src/stdio/ftell_test.cpp new file mode 100644 index 0000000000000..a788c759300ea --- /dev/null +++ b/libc/test/src/stdio/ftell_test.cpp @@ -0,0 +1,63 @@ +//===-- Unittests for ftell -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fflush.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fseek.h" +#include "src/stdio/ftell.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setvbuf.h" +#include "utils/UnitTest/Test.h" + +#include + +class LlvmLibcFTellTest : public __llvm_libc::testing::Test { +protected: + void test_with_bufmode(int bufmode) { + constexpr char FILENAME[] = "testdata/ftell.test"; + // We will set a special buffer to the file so that we guarantee buffering. + constexpr size_t BUFFER_SIZE = 1024; + char buffer[BUFFER_SIZE]; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w+"); + ASSERT_FALSE(file == nullptr); + ASSERT_EQ(__llvm_libc::setvbuf(file, buffer, bufmode, BUFFER_SIZE), 0); + + // Include few '\n' chars to test when |bufmode| is _IOLBF. + constexpr char CONTENT[] = "12\n345\n6789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // The above write should have buffered the written data and not have + // trasferred it to the underlying stream. But, ftell operation should + // still return the correct effective offset. + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), WRITE_SIZE); + + long offset = 5; + ASSERT_EQ(0, __llvm_libc::fseek(file, offset, SEEK_SET)); + ASSERT_EQ(__llvm_libc::ftell(file), offset); + ASSERT_EQ(0, __llvm_libc::fseek(file, -offset, SEEK_END)); + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), size_t(WRITE_SIZE - offset)); + + ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET)); + constexpr size_t READ_SIZE = WRITE_SIZE / 2; + char data[READ_SIZE]; + // Reading a small amount will actually read out much more data and + // buffer it. But, ftell should return the correct effective offset. + ASSERT_EQ(READ_SIZE, __llvm_libc::fread(data, 1, READ_SIZE, file)); + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), READ_SIZE); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + } +}; + +TEST_F(LlvmLibcFTellTest, TellWithFBF) { test_with_bufmode(_IOFBF); } + +TEST_F(LlvmLibcFTellTest, TellWithNBF) { test_with_bufmode(_IONBF); } + +TEST_F(LlvmLibcFTellTest, TellWithLBF) { test_with_bufmode(_IOLBF); } From 4fa00ce15c842aa8be495759723e2e2450591380 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 4 Nov 2022 17:52:21 +0000 Subject: [PATCH 467/516] [mlir][sparse] extend foreach operation to accept reduction arguments; fix sparse tensor rewriting patterns that do not propagate sparse tensor SSA properly. This patch re-commit D137468 and D137463, which were reverted by mistakes. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D137579 --- .../SparseTensor/IR/SparseTensorOps.td | 66 +++++---- .../SparseTensor/IR/SparseTensorDialect.cpp | 41 +++++- .../SparseTensor/Transforms/CodegenUtils.cpp | 3 + .../Transforms/SparseTensorRewriting.cpp | 130 ++++++++++++------ .../SparseTensor/convert_dense2sparse.mlir | 22 +-- .../SparseTensor/convert_sparse2sparse.mlir | 10 +- mlir/test/Dialect/SparseTensor/invalid.mlir | 45 ++++++ .../SparseTensor/rewriting_for_codegen.mlir | 11 +- mlir/test/Dialect/SparseTensor/roundtrip.mlir | 20 +++ .../SparseTensor/sparse_concat_codegen.mlir | 27 ++-- .../Dialect/SparseTensor/sparse_reshape.mlir | 104 +++++++------- 11 files changed, 329 insertions(+), 150 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 5d667448e2f37..52a6aff752792 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -896,21 +896,44 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", [SingleBlockImplicitTerminator<"YieldOp">]>, - Arguments<(ins AnyTensor:$tensor)>{ + Arguments<(ins AnyTensor:$tensor, + Variadic:$initArgs)>, + Results<(outs Variadic:$results)> { let summary = "Iterates over elements in a tensor"; let description = [{ Iterates over stored elements in a tensor (which are typically, but not always, non-zero for sparse tensors) and executes the block. - For an input tensor with rank n, the block must take n + 1 arguments. The - first n arguments must be Index type, together indicating the current coordinates - of the element being visited. The last argument must have the same type as the + For an input tensor with rank n, the block must take n + 1 (and additional loop + carried variables as described below) arguments. The first n arguments must be + Index type, together indicating the current coordinates of the element being visited. + The last argument must have the same type as the tensor's element type, representing the actual value loaded from the input tensor at the given coordinates. - Note that foreach generated loop iterates over the stored elements in the storage - order. However, no matter what storage order is used, the indices passed to the block - always obey the original dimension order. + `sparse_tensor.foreach` can also operate on loop-carried variables and returns + the final values after loop termination. The initial values of the variables are + passed as additional SSA operands to the "sparse_tensor.foreach" following the n + 1 + SSA values mentioned above (n coordinate and 1 value). + + The region must terminate with a "sparse_tensor.yield" that passes the current + values of all loop-carried variables to the next iteration, or to the + result, if at the last iteration. The number and static types of loop-carried + variables may not change with iterations. + + For example: + ```mlir + %c0 = arith.constant 0 : i32 + %ret = sparse_tensor.foreach in %0 init(%c0): tensor, i32 -> i32 do { + ^bb0(%arg1: index, %arg2: index, %arg3: i32, %iter: i32): + %sum = arith.add %iter, %arg3 + sparse_tensor.yield %sum + } + ``` + + It is important to note that foreach generated loop iterates over the stored elements + in the storage order. However, no matter what storage order is used, the indices passed + to the block always obey the original dimension order. For example: ```mlir @@ -918,10 +941,10 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(i,j) -> (j,i)> }> - + // foreach on a column-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #COL_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1] } @@ -931,30 +954,25 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", // foreach on a row-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #ROW_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1] } ``` - - Example: - - ```mlir - sparse_tensor.foreach in %0 : tensor do { - ^bb0(%arg1: index, %arg2: index, %arg3: f64): - do something... - } - ``` }]; let builders = [ - OpBuilder<( - ins "Value":$tensor, - "function_ref")> + OpBuilder<(ins "Value":$tensor, + "function_ref")>, + OpBuilder<(ins "Value":$tensor, + "ValueRange":$iterArgs, + "function_ref")> ]; - let regions = (region AnyRegion:$region); - let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region"; + let regions = (region SizedRegion<1>:$region); + let assemblyFormat = "`in` $tensor (`init``(`$initArgs^`)`)? attr-dict" + " `:` type($tensor) (`,` type($initArgs)^)?" + " (`->` type($results)^)? `do` $region"; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 6d6bd26251953..6a4177737df9f 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -581,11 +581,20 @@ LogicalResult CompressOp::verify() { void ForeachOp::build( OpBuilder &builder, OperationState &result, Value tensor, - function_ref bodyBuilder) { - build(builder, result, tensor); + function_ref + bodyBuilder) { + build(builder, result, tensor, llvm::None, bodyBuilder); +} + +void ForeachOp::build( + OpBuilder &builder, OperationState &result, Value tensor, + ValueRange initArgs, + function_ref + bodyBuilder) { + build(builder, result, initArgs.getTypes(), tensor, initArgs); + // Builds foreach body. if (!bodyBuilder) return; - auto rtp = tensor.getType().cast(); int64_t rank = rtp.getRank(); @@ -594,31 +603,49 @@ void ForeachOp::build( std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType()); // Followed by one value. blockArgTypes.push_back(rtp.getElementType()); + // Followed by reduction variable. + blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end()); SmallVector blockArgLocs; - std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc()); + std::fill_n(std::back_inserter(blockArgLocs), blockArgTypes.size(), + tensor.getLoc()); OpBuilder::InsertionGuard guard(builder); auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuilder(builder, result.location, bodyBlock->getArguments()); + bodyBuilder(builder, result.location, + bodyBlock->getArguments().slice(0, rank), + bodyBlock->getArguments()[rank], + bodyBlock->getArguments().drop_front(rank + 1)); } LogicalResult ForeachOp::verify() { auto t = getTensor().getType().cast(); auto args = getBody()->getArguments(); - if (static_cast(t.getRank()) + 1 != args.size()) + if (static_cast(t.getRank()) + 1 + getInitArgs().size() != + args.size()) return emitError("Unmatched number of arguments in the block"); + if (getNumResults() != getInitArgs().size()) + return emitError("Mismatch in number of init arguments and results"); + + if (getResultTypes() != getInitArgs().getTypes()) + return emitError("Mismatch in types of init arguments and results"); + + auto yield = cast(getBody()->getTerminator()); + if (yield.getNumOperands() != getNumResults() || + yield.getOperands().getTypes() != getResultTypes()) + return emitError("Mismatch in types of yield values and results"); + for (int64_t i = 0, e = t.getRank(); i < e; i++) if (args[i].getType() != IndexType::get(getContext())) emitError( llvm::formatv("Expecting Index type for argument at index {0}", i)); auto elemTp = t.getElementType(); - auto valueTp = args.back().getType(); + auto valueTp = args[t.getRank()].getType(); if (elemTp != valueTp) emitError(llvm::formatv("Unmatched element type between input tensor and " "block argument, expected:{0}, got: {1}", diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index fc240b0b10c08..fcddcd27ed40b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -956,6 +956,9 @@ Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, return val; } +// FIXME: +// 1. Dense tensors loop should be generated by loop emitter. +// 2. Support reduction variables to propagate SSA chains properly. void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( OpBuilder &builder, Location loc, Value src, unsigned rank, function_ref bodyBuilder) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 9c002f1ae0ec8..d0613c09503c0 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -356,8 +356,10 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); - rewriter.create( - loc, srcTensor, [&](OpBuilder &builder, Location loc, ValueRange args) { + ForeachOp foreachOp = rewriter.create( + loc, srcTensor, cooBuffer, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector srcIndices; SmallVector dstIndices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { @@ -366,11 +368,11 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - builder.create(loc, args.back(), cooBuffer, dstIndices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), dstIndices); + builder.create(loc, t); }); - - rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -440,13 +442,16 @@ struct ConcatenateRewriter : public OpRewritePattern { rewriter.create(loc, cooTp, ValueRange()).getResult(); Value offset = constantIndex(rewriter, loc, 0); + ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds the indexing map. // Build a for op for each input tensor to append new values into the // output tensor. - rewriter.create( - loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) { + foreachOp = rewriter.create( + loc, input, cooBuffer, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0; i < rank; i++) { uint64_t dim = @@ -457,8 +462,8 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - builder.create(loc, args.back(), cooBuffer, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -467,7 +472,10 @@ struct ConcatenateRewriter : public OpRewritePattern { assert(!ShapedType::isDynamic(d)); offset = rewriter.create(loc, offset, constantIndex(rewriter, loc, d)); + cooBuffer = foreachOp.getResult(0); } + + cooBuffer = rewriter.create(loc, cooBuffer, true); rewriter.replaceOpWithNewOp(op, rtp, cooBuffer); return success(); } @@ -558,12 +566,13 @@ struct ConvertRewriter : public OpRewritePattern { sizesForTensor(rewriter, sizes, loc, srcTp, src); Value dst = allocDenseTensor(rewriter, loc, dstTp, sizes); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { - builder.create(loc, args.back(), dst, - args.drop_back()); - builder.create(loc); - }); + rewriter.create(loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, + ValueRange args, Value v, ValueRange reduc) { + builder.create(loc, v, dst, + args); + builder.create(loc); + }); rewriter.replaceOpWithNewOp(op, dstTp, dst); return success(); @@ -597,17 +606,19 @@ struct ConvertRewriter : public OpRewritePattern { srcTp = getUnorderedCOOFromType(srcTp); tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + auto foreachOp = rewriter.create( + loc, src, tmpCoo, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - builder.create(loc, args.back(), tmpCoo, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); - src = tmpCoo; + src = rewriter.create(loc, foreachOp.getResult(0), true); } // Sort the COO tensor so that its elements are ordered via increasing @@ -646,27 +657,31 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + auto foreachOp = rewriter.create( + loc, src, dst, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encDst, i); indices.push_back(args[dim]); } - builder.create(loc, args.back(), dst, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); - // Release the temporary COO if it is created. + // Release the temporary COO if it is created. Note that tmpCoo is + // invalidated due to foreach and updated to src. if (tmpCoo) - rewriter.create(loc, tmpCoo); + rewriter.create(loc, src); // Directly replace op with dst results in bufferization error message // "sparse tensor allocation should not escape function". // As such, we insert a trivial tensor convert which will be removed by // codegen. rewriter.setInsertionPointAfter(op); - rewriter.replaceOpWithNewOp(op, dstTp, dst); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -685,6 +700,8 @@ struct ForeachRewriter : public OpRewritePattern { int64_t rank = rtp.getRank(); auto enc = getSparseTensorEncoding(rtp); + SmallVector reduc = op.getInitArgs(); + // 1. Generates loop for the sparse input. SparseTensorLoopEmitter loopEmitter(ValueRange{input}); loopEmitter.initializeLoopEmit(rewriter, loc); @@ -692,7 +709,9 @@ struct ForeachRewriter : public OpRewritePattern { // TODO: provide utility function for loop sequences that only contains // one for loop? loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast(i)); - loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i); + // Note that reduc will be taken care of by loop emitter and get updated + // in place. + loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i, reduc); } SmallVector coords; @@ -707,15 +726,7 @@ struct ForeachRewriter : public OpRewritePattern { : rewriter.create(loc, vals, coords); // 2. Inline the block in the foreach operator. - Block::iterator inlinePos = rewriter.getInsertionPoint(); Block *srcBlock = op.getBody(); - // Remove sparse_tensor.yield. - rewriter.eraseOp(srcBlock->getTerminator()); - - for (int64_t i = 0; i < rank; i++) { - loopEmitter.exitCurrentLoop(rewriter, loc); - loopEmitter.exitCurrentLoopSeq(); - } SmallVector args; // Remap coordinates. @@ -725,11 +736,33 @@ struct ForeachRewriter : public OpRewritePattern { } // Remap value. args.push_back(val); + // Remap reduction variables. + args.append(reduc); + + // Remove sparse_tensor.yield. + SmallVector reducValue = srcBlock->getTerminator()->getOperands(); + rewriter.eraseOp(srcBlock->getTerminator()); // Inline body. - rewriter.mergeBlockBefore(srcBlock, &*inlinePos, args); - // delete the foreach operator. - rewriter.eraseOp(op); + if (!reducValue.empty()) { + rewriter.mergeBlocks(srcBlock, rewriter.getBlock(), args); + } else { + // This is annoying, since scf.for inserts a implicit yield op when + // there is no reduction variable upon creation, in this case we need to + // merge the block *before* the yield op. + rewriter.mergeBlockBefore(srcBlock, &*rewriter.getInsertionPoint(), args); + } + + for (int64_t i = 0; i < rank; i++) { + // Link the reduction chain. Note that loop emitter update the reducValue + // in place. + loopEmitter.exitCurrentLoop(rewriter, loc, reducValue); + loopEmitter.exitCurrentLoopSeq(); + } + + // Replace the foreach operator with the value returned by the outtermost + // for loop. + rewriter.replaceOp(op, reducValue); return success(); } }; @@ -792,7 +825,8 @@ struct NewRewriter : public OpRewritePattern { .getResult(0); Type eltTp = dstTp.getElementType(); Value value = genAllocaScalar(rewriter, loc, eltTp); - scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1); + scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1, + ArrayRef(cooBuffer)); rewriter.setInsertionPointToStart(forOp.getBody()); SmallString<18> getNextFuncName{"getSparseTensorReaderNext", @@ -807,13 +841,17 @@ struct NewRewriter : public OpRewritePattern { loc, indices, constantIndex(rewriter, loc, i))); } Value v = rewriter.create(loc, value); - rewriter.create(loc, v, cooBuffer, indicesArray); + auto t = rewriter.create(loc, v, forOp.getRegionIterArg(0), + indicesArray); + rewriter.create(loc, ArrayRef(t)); rewriter.setInsertionPointAfter(forOp); + // Link SSA chain. + cooBuffer = forOp.getResult(0); // Release the sparse tensor reader. createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader}, EmitCInterface::Off); - + cooBuffer = rewriter.create(loc, cooBuffer, true); Value newOp = rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); // Release the unordered COO tensor buffer. @@ -866,12 +904,14 @@ struct OutRewriter : public OpRewritePattern { ModuleOp module = op->getParentOfType(); // For each element in the source tensor, output the element. rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { for (uint64_t i = 0; i < rank; i++) { rewriter.create(loc, args[i], indices, constantIndex(builder, loc, i)); } - rewriter.create(loc, args.back(), value); + rewriter.create(loc, v, value); SmallVector operands{writer, rankValue, indices, value}; FlatSymbolRefAttr fn = getFunc(module, outNextFuncName, {}, operands, EmitCInterface::On); diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index d67e11b92dd9c..cb1f16ef2cd20 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -116,6 +116,7 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V:.*]] = tensor.extract %[[A]]{{\[}}%[[FI]], %[[FJ]]] : tensor<2x4xf64> // CHECK-RWT: %[[NZ:.*]] = arith.cmpf une, %[[V]], %[[F0]] : f64 // CHECK-RWT: scf.if %[[NZ]] { +// // FIXME: the SSA chain is broken here! // CHECK-RWT: %{{.*}} = sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[FI]], %[[FJ]]] // CHECK-RWT: } // CHECK-RWT: } @@ -126,11 +127,13 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V2:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]], %[[I1]] jointly %[[V2]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): -// CHECK-RWT: sparse_tensor.insert %[[FV]] into %[[DST]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64, %[[R0:.*]]: tensor +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.insert %[[FV]] into %[[R0]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: sparse_tensor.yield %[[RET]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.load %[[NEW_T]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[NT]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { @@ -179,6 +182,7 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[I1r:.*]] = tensor.extract %[[SI]]{{\[}}%[[FI]], %[[C1]]] : tensor<2x2xi64> // CHECK-RWT: %[[I1:.*]] = arith.index_cast %[[I1r]] : i64 to index // CHECK-RWT: %[[V:.*]] = tensor.extract %[[SV]]{{\[}}%[[FI]]] : tensor<2xf32> +// // FIXME: the SSA chain is broken here! // CHECK-RWT: sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[I0]], %[[I1]]] // CHECK-RWT: } // CHECK-RWT: %[[TI0:.*]] = sparse_tensor.indices %[[COO]] {dimension = 0 : index} @@ -187,11 +191,13 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[TV:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[TI0]], %[[TI1]] jointly %[[TV]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[F2V]] into %[[DST]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32, %[[R0:.*]]: tensor +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.insert %[[F2V]] into %[[R0]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: sparse_tensor.yield %[[NEW_T]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 92f9e46b90938..17145f8d37380 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -94,11 +94,13 @@ func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[A]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]] jointly %[[V]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor(%[[D]]) -// CHECK-RWT: sparse_tensor.foreach in %[[A]] -// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[FV2]] into %[[DST]]{{\[}}%[[FI2]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[A]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32, %[[T:.*]]: tensor> func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index dd27ce398c203..02fb97bc866c6 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -551,6 +551,51 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { // ----- +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { + // expected-error@+1 {{Unmatched element type between input tensor and block argument}} + sparse_tensor.foreach in %arg0 : tensor<2x4xf64, #DCSR> do { + ^bb0(%1: index, %2: index, %v: f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in number of init arguments and results}} + sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r1 : i32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of init arguments and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> i32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of yield values and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + sparse_tensor.yield %1 : index + } + return +} + +// ----- + // TODO: a test case with empty xs doesn't work due to some parser issues. func.func @sparse_sort_x_type( %arg0: index, %arg1: memref) { diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 79b616dec8304..3a6cf999df90a 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -18,18 +18,19 @@ // CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) // CHECK: %[[VB:.*]] = memref.alloca() -// CHECK: scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] { +// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) // CHECK: %[[E0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[E1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[V:.*]] = memref.load %[[VB]][] -// CHECK: sparse_tensor.insert %[[V]] into %[[T]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: %[[T1:.*]] = sparse_tensor.insert %[[V]] into %[[A2]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: scf.yield %[[T1]] // CHECK: } // CHECK: call @delSparseTensorReader(%[[R]]) -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T]] -// CHECK: bufferization.dealloc_tensor %[[T]] +// CHECK: %[[T3:.*]] = sparse_tensor.load %[[T2]] hasInserts +// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T3]] +// CHECK: bufferization.dealloc_tensor %[[T3]] // CHECK: return %[[R]] -// CHECK: } func.func @sparse_new(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 0ef58db148525..bc664ae3d2d00 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -411,6 +411,26 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { return } +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// CHECK-LABEL: func @sparse_tensor_foreach( +// CHECK-SAME: %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[A1:.*]]: f32 +// CHECK-NEXT: %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]]) +// CHECK-NEXT: ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32) +// CHECK: sparse_tensor.yield %[[TMP_r]] : f32 +// CHECK: } +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<2x4xf64, #DCSR>, f32 -> f32 + do { + ^bb0(%1: index, %2: index, %v: f64, %r: f32) : + sparse_tensor.yield %r : f32 + } + return +} + // ---- // CHECK-LABEL: func @sparse_sort_1d0v( diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir index 7280c6f5e7ba3..717819bd0cb16 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -19,16 +19,18 @@ // CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] { +// CHECK: %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_1]] // CHECK: } +// CHECK: scf.yield %[[RET_4]] // CHECK: } // CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor @@ -37,17 +39,19 @@ // CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] { +// CHECK: %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_5:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A3:.*]] = %[[A2]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_2]] // CHECK: } +// CHECK: scf.yield %[[RET_5]] // CHECK: } // CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor @@ -56,19 +60,22 @@ // CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] { +// CHECK: %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_6:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A5:.*]] = %[[A4]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_3]] // CHECK: } +// CHECK: scf.yield %[[RET_6]] // CHECK: } -// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts +// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor // CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index c162bacffac96..94ee50197fa9c 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -52,14 +52,16 @@ // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT:.*]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> // func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> { @@ -111,25 +113,28 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor() // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[A0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[A1:.*]] = %[[A0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index +// CHECK-RWT: %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[R1]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<100xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> // func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> { @@ -191,7 +196,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[T1:.*]] = arith.muli %[[DD0]], %[[C10]] : index @@ -200,9 +205,11 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index // CHECK-RWT: %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor { @@ -260,28 +267,31 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor(%[[DD0]]) // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index -// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index -// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index -// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[R1:.*]] = %[[R0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index +// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index +// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index +// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[NT]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor { From 6a6101958a9ef3dd076f3c65b5d7b200314c0a8c Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Sun, 6 Nov 2022 06:25:15 +0000 Subject: [PATCH 468/516] [libc] Add implementation of getc, getc_unlocked and fgetc_unlocked. Reviewed By: michaelrj Differential Revision: https://reviews.llvm.org/D137507 --- libc/config/linux/x86_64/entrypoints.txt | 3 + libc/spec/gnu_ext.td | 5 ++ libc/spec/posix.td | 5 ++ libc/spec/stdc.td | 5 ++ libc/src/stdio/CMakeLists.txt | 36 +++++++++++ libc/src/stdio/fgetc_unlocked.cpp | 25 ++++++++ libc/src/stdio/fgetc_unlocked.h | 20 ++++++ libc/src/stdio/getc.cpp | 24 ++++++++ libc/src/stdio/getc.h | 20 ++++++ libc/src/stdio/getc_unlocked.cpp | 25 ++++++++ libc/src/stdio/getc_unlocked.h | 20 ++++++ libc/test/src/stdio/CMakeLists.txt | 23 +++++++ libc/test/src/stdio/fgetc_test.cpp | 66 +++++++++++--------- libc/test/src/stdio/fgetc_unlocked_test.cpp | 67 +++++++++++++++++++++ 14 files changed, 317 insertions(+), 27 deletions(-) create mode 100644 libc/src/stdio/fgetc_unlocked.cpp create mode 100644 libc/src/stdio/fgetc_unlocked.h create mode 100644 libc/src/stdio/getc.cpp create mode 100644 libc/src/stdio/getc.h create mode 100644 libc/src/stdio/getc_unlocked.cpp create mode 100644 libc/src/stdio/getc_unlocked.h create mode 100644 libc/test/src/stdio/fgetc_unlocked_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 8349af0e54e2a..2544ff2a5e40a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -376,6 +376,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.ferror libc.src.stdio.ferror_unlocked libc.src.stdio.fgetc + libc.src.stdio.fgetc_unlocked libc.src.stdio.fgets libc.src.stdio.fflush libc.src.stdio.fopen @@ -390,6 +391,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked libc.src.stdio.fprintf + libc.src.stdio.getc + libc.src.stdio.getc_unlocked libc.src.stdio.printf libc.src.stdio.putc libc.src.stdio.putchar diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td index ebadbb6f12c4c..239790bb9bdc5 100644 --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -137,6 +137,11 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "fgetc_unlocked", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/spec/posix.td b/libc/spec/posix.td index 43b3319059c98..0333c6e47c4d3 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -1011,6 +1011,11 @@ def POSIX : StandardSpec<"POSIX"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "getc_unlocked", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index bb3f9a1c701b7..4d1295313e5ae 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -568,6 +568,11 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "getc", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "putc", RetValSpec, diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index f62b095997474..d4b39767473e5 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -113,6 +113,42 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + fgetc_unlocked + SRCS + fgetc_unlocked.cpp + HDRS + fgetc_unlocked.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + getc + SRCS + getc.cpp + HDRS + getc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + getc_unlocked + SRCS + getc_unlocked.cpp + HDRS + getc_unlocked.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( fgets SRCS diff --git a/libc/src/stdio/fgetc_unlocked.cpp b/libc/src/stdio/fgetc_unlocked.cpp new file mode 100644 index 0000000000000..d61493b5bba10 --- /dev/null +++ b/libc/src/stdio/fgetc_unlocked.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of fgetc_unlocked ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fgetc_unlocked.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, fgetc_unlocked, (::FILE * stream)) { + unsigned char c; + size_t r = + reinterpret_cast<__llvm_libc::File *>(stream)->read_unlocked(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/fgetc_unlocked.h b/libc/src/stdio/fgetc_unlocked.h new file mode 100644 index 0000000000000..e374a6d0f6ce1 --- /dev/null +++ b/libc/src/stdio/fgetc_unlocked.h @@ -0,0 +1,20 @@ +//===-- Implementation header of fgetc_unlocked -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H +#define LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H + +#include + +namespace __llvm_libc { + +int fgetc_unlocked(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H diff --git a/libc/src/stdio/getc.cpp b/libc/src/stdio/getc.cpp new file mode 100644 index 0000000000000..406e83f2b3627 --- /dev/null +++ b/libc/src/stdio/getc.cpp @@ -0,0 +1,24 @@ +//===-- Implementation of getc --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/getc.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, getc, (::FILE * stream)) { + unsigned char c; + size_t r = reinterpret_cast<__llvm_libc::File *>(stream)->read(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/getc.h b/libc/src/stdio/getc.h new file mode 100644 index 0000000000000..b5de6a7585807 --- /dev/null +++ b/libc/src/stdio/getc.h @@ -0,0 +1,20 @@ +//===-- Implementation header of getc ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_GETC_H +#define LLVM_LIBC_SRC_STDIO_GETC_H + +#include + +namespace __llvm_libc { + +int getc(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_GETC_H diff --git a/libc/src/stdio/getc_unlocked.cpp b/libc/src/stdio/getc_unlocked.cpp new file mode 100644 index 0000000000000..48adba5ff4118 --- /dev/null +++ b/libc/src/stdio/getc_unlocked.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of getc_unlocked ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/getc_unlocked.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, getc_unlocked, (::FILE * stream)) { + unsigned char c; + size_t r = + reinterpret_cast<__llvm_libc::File *>(stream)->read_unlocked(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/getc_unlocked.h b/libc/src/stdio/getc_unlocked.h new file mode 100644 index 0000000000000..b318dfc934e90 --- /dev/null +++ b/libc/src/stdio/getc_unlocked.h @@ -0,0 +1,20 @@ +//===-- Implementation header of getc_unlocked ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H +#define LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H + +#include + +namespace __llvm_libc { + +int getc_unlocked(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 7c76c1990a685..f27d7bba1c562 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -216,6 +216,29 @@ add_libc_unittest( libc.src.stdio.fgetc libc.src.stdio.fopen libc.src.stdio.fwrite + libc.src.stdio.getc +) + +add_libc_unittest( + fgetc_unlocked_test + SUITE + libc_stdio_unittests + SRCS + fgetc_unlocked_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.ferror + libc.src.stdio.ferror_unlocked + libc.src.stdio.feof + libc.src.stdio.feof_unlocked + libc.src.stdio.fgetc_unlocked + libc.src.stdio.flockfile + libc.src.stdio.fopen + libc.src.stdio.funlockfile + libc.src.stdio.fwrite + libc.src.stdio.getc_unlocked ) add_libc_unittest( diff --git a/libc/test/src/stdio/fgetc_test.cpp b/libc/test/src/stdio/fgetc_test.cpp index 50d1780840de4..5be141ecd202e 100644 --- a/libc/test/src/stdio/fgetc_test.cpp +++ b/libc/test/src/stdio/fgetc_test.cpp @@ -13,38 +13,50 @@ #include "src/stdio/fgetc.h" #include "src/stdio/fopen.h" #include "src/stdio/fwrite.h" +#include "src/stdio/getc.h" #include "utils/UnitTest/Test.h" #include #include -TEST(LlvmLibcFGetCTest, WriteAndReadCharacters) { - constexpr char FILENAME[] = "testdata/fgetc.test"; - ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); - ASSERT_FALSE(file == nullptr); - constexpr char CONTENT[] = "123456789"; - constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; - ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); - // This is a write-only file so reads should fail. - ASSERT_EQ(__llvm_libc::fgetc(file), EOF); - // This is an error and not a real EOF. - ASSERT_EQ(__llvm_libc::feof(file), 0); - ASSERT_NE(__llvm_libc::ferror(file), 0); - errno = 0; - - ASSERT_EQ(0, __llvm_libc::fclose(file)); - - file = __llvm_libc::fopen(FILENAME, "r"); - ASSERT_FALSE(file == nullptr); - - for (size_t i = 0; i < WRITE_SIZE; ++i) { - int c = __llvm_libc::fgetc(file); - ASSERT_EQ(c, int('1' + i)); +class LlvmLibcGetcTest : public __llvm_libc::testing::Test { +public: + using GetcFunc = int(FILE *); + void test_with_func(GetcFunc *func, const char *filename) { + ::FILE *file = __llvm_libc::fopen(filename, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "123456789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // This is a write-only file so reads should fail. + ASSERT_EQ(func(file), EOF); + // This is an error and not a real EOF. + ASSERT_EQ(__llvm_libc::feof(file), 0); + ASSERT_NE(__llvm_libc::ferror(file), 0); + errno = 0; + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(filename, "r"); + ASSERT_FALSE(file == nullptr); + + for (size_t i = 0; i < WRITE_SIZE; ++i) { + int c = func(file); + ASSERT_EQ(c, int('1' + i)); + } + // Reading more should return EOF but not set error. + ASSERT_EQ(func(file), EOF); + ASSERT_NE(__llvm_libc::feof(file), 0); + ASSERT_EQ(__llvm_libc::ferror(file), 0); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); } - // Reading more should return EOF but not set error. - ASSERT_EQ(__llvm_libc::fgetc(file), EOF); - ASSERT_NE(__llvm_libc::feof(file), 0); - ASSERT_EQ(__llvm_libc::ferror(file), 0); +}; + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithFgetc) { + test_with_func(&__llvm_libc::fgetc, "testdata/fgetc.test"); +} - ASSERT_EQ(0, __llvm_libc::fclose(file)); +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithGetc) { + test_with_func(&__llvm_libc::getc, "testdata/getc.test"); } diff --git a/libc/test/src/stdio/fgetc_unlocked_test.cpp b/libc/test/src/stdio/fgetc_unlocked_test.cpp new file mode 100644 index 0000000000000..2687b2231afd5 --- /dev/null +++ b/libc/test/src/stdio/fgetc_unlocked_test.cpp @@ -0,0 +1,67 @@ +//===-- Unittests for fgetc -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/feof.h" +#include "src/stdio/feof_unlocked.h" +#include "src/stdio/ferror.h" +#include "src/stdio/ferror_unlocked.h" +#include "src/stdio/fgetc_unlocked.h" +#include "src/stdio/flockfile.h" +#include "src/stdio/fopen.h" +#include "src/stdio/funlockfile.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/getc_unlocked.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +class LlvmLibcGetcTest : public __llvm_libc::testing::Test { +public: + using GetcFunc = int(FILE *); + void test_with_func(GetcFunc *func, const char *filename) { + ::FILE *file = __llvm_libc::fopen(filename, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "123456789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // This is a write-only file so reads should fail. + ASSERT_EQ(func(file), EOF); + // This is an error and not a real EOF. + ASSERT_EQ(__llvm_libc::feof(file), 0); + ASSERT_NE(__llvm_libc::ferror(file), 0); + errno = 0; + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(filename, "r"); + ASSERT_FALSE(file == nullptr); + + __llvm_libc::flockfile(file); + for (size_t i = 0; i < WRITE_SIZE; ++i) { + int c = func(file); + ASSERT_EQ(c, int('1' + i)); + } + // Reading more should return EOF but not set error. + ASSERT_EQ(func(file), EOF); + ASSERT_NE(__llvm_libc::feof_unlocked(file), 0); + ASSERT_EQ(__llvm_libc::ferror_unlocked(file), 0); + + __llvm_libc::funlockfile(file); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + } +}; + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithFgetcUnlocked) { + test_with_func(&__llvm_libc::fgetc_unlocked, "testdata/fgetc_unlocked.test"); +} + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithGetcUnlocked) { + test_with_func(&__llvm_libc::getc_unlocked, "testdata/getc_unlocked.test"); +} From 0c8db885f62713af06116fc02cf19b4e0ba701f4 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 19 Oct 2022 13:33:05 -0700 Subject: [PATCH 469/516] [libc] add scanf reader This is the interface that will be used to read from a file or string in scanf. This patch also adds the string and file implementations of the reader, although the file reader is not yet complete since ungetc has not yet been implemented. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D137117 --- libc/src/stdio/scanf_core/CMakeLists.txt | 35 ++++++++++ libc/src/stdio/scanf_core/file_reader.cpp | 26 ++++++++ libc/src/stdio/scanf_core/file_reader.h | 38 +++++++++++ libc/src/stdio/scanf_core/reader.cpp | 35 ++++++++++ libc/src/stdio/scanf_core/reader.h | 49 ++++++++++++++ libc/src/stdio/scanf_core/string_reader.cpp | 24 +++++++ libc/src/stdio/scanf_core/string_reader.h | 33 ++++++++++ libc/test/src/stdio/scanf_core/CMakeLists.txt | 18 +++++ .../stdio/scanf_core/string_reader_test.cpp | 66 +++++++++++++++++++ 9 files changed, 324 insertions(+) create mode 100644 libc/src/stdio/scanf_core/file_reader.cpp create mode 100644 libc/src/stdio/scanf_core/file_reader.h create mode 100644 libc/src/stdio/scanf_core/reader.cpp create mode 100644 libc/src/stdio/scanf_core/reader.h create mode 100644 libc/src/stdio/scanf_core/string_reader.cpp create mode 100644 libc/src/stdio/scanf_core/string_reader.h create mode 100644 libc/test/src/stdio/scanf_core/string_reader_test.cpp diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index 3941d40a838c7..91cf5e2ada907 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -23,3 +23,38 @@ add_object_library( libc.src.__support.CPP.bitset libc.src.__support.CPP.string_view ) + +if(NOT (TARGET libc.src.__support.File.file)) + # Not all platforms have a file implementation. If file is unvailable, + # then we must skip all the parts that need file. + return() +endif() + +add_object_library( + string_reader + SRCS + string_reader.cpp + HDRS + string_reader.h +) + +add_object_library( + file_reader + SRCS + file_reader.cpp + HDRS + file_reader.h + DEPENDS + libc.src.__support.File.file +) + +add_object_library( + reader + SRCS + reader.cpp + HDRS + reader.h + DEPENDS + .string_reader + .file_reader +) diff --git a/libc/src/stdio/scanf_core/file_reader.cpp b/libc/src/stdio/scanf_core/file_reader.cpp new file mode 100644 index 0000000000000..f39c3b9ab8412 --- /dev/null +++ b/libc/src/stdio/scanf_core/file_reader.cpp @@ -0,0 +1,26 @@ +//===-- FILE Reader implementation for scanf --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/file_reader.h" +#include "src/__support/File/file.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char FileReader::get_char() { + char tiny_buff = 0; + if (file->read_unlocked(&tiny_buff, 1) != 1) + return 0; + return tiny_buff; +} + +void FileReader::unget_char(char c) { file->ungetc_unlocked(c); } + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/file_reader.h b/libc/src/stdio/scanf_core/file_reader.h new file mode 100644 index 0000000000000..5e97eb604e66b --- /dev/null +++ b/libc/src/stdio/scanf_core/file_reader.h @@ -0,0 +1,38 @@ +//===-- FILE Reader definition for scanf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H + +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { +namespace scanf_core { + +class FileReader { + __llvm_libc::File *file; + +public: + FileReader(::FILE *init_file) { + file = reinterpret_cast<__llvm_libc::File *>(init_file); + file->lock(); + } + + ~FileReader() { file->unlock(); } + + char get_char(); + void unget_char(char c); +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp new file mode 100644 index 0000000000000..23dcbd405505d --- /dev/null +++ b/libc/src/stdio/scanf_core/reader.cpp @@ -0,0 +1,35 @@ +//===-- Reader definition for scanf -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char Reader::getc() { + if (reader_type == ReaderType::String) { + return string_reader->get_char(); + } else { + return file_reader->get_char(); + } +} + +void Reader::ungetc(char c) { + if (reader_type == ReaderType::String) { + // The string reader ignores the char c passed to unget since it doesn't + // need to place anything back into a buffer, and modifying the source + // string would be dangerous. + return string_reader->unget_char(); + } else { + return file_reader->unget_char(c); + } +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h new file mode 100644 index 0000000000000..4d6ed06c00e7c --- /dev/null +++ b/libc/src/stdio/scanf_core/reader.h @@ -0,0 +1,49 @@ +//===-- Reader definition for scanf -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H + +#include "src/stdio/scanf_core/file_reader.h" +#include "src/stdio/scanf_core/string_reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +enum class ReaderType { String, File }; + +class Reader final { + union { + StringReader *string_reader; + FileReader *file_reader; + }; + + const ReaderType reader_type; + +public: + Reader(StringReader *init_string_reader) + : string_reader(init_string_reader), reader_type(ReaderType::String) {} + + Reader(FileReader *init_file_reader) + : file_reader(init_file_reader), reader_type(ReaderType::File) {} + + // This returns the next character from the input and advances it by one + // character. When it hits the end of the string or file it returns '\0' to + // signal to stop parsing. + char getc(); + + // This moves the input back by one character, placing c into the buffer if + // this is a file reader, else c is ignored. + void ungetc(char c); +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H diff --git a/libc/src/stdio/scanf_core/string_reader.cpp b/libc/src/stdio/scanf_core/string_reader.cpp new file mode 100644 index 0000000000000..1d728d2b9eb35 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_reader.cpp @@ -0,0 +1,24 @@ +//===-- String Reader implementation for scanf ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/string_reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char StringReader::get_char() { + char cur_char = string[cur_index]; + ++cur_index; + return cur_char; +} + +void StringReader::unget_char() { --cur_index; } + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/string_reader.h b/libc/src/stdio/scanf_core/string_reader.h new file mode 100644 index 0000000000000..35550b16c3214 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_reader.h @@ -0,0 +1,33 @@ +//===-- String Reader definition for scanf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H + +#include + +namespace __llvm_libc { +namespace scanf_core { + +class StringReader { + const char *string; + size_t cur_index = 0; + +public: + StringReader(const char *init_string) { string = init_string; } + + ~StringReader() {} + + char get_char(); + void unget_char(); +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt index 3235a0e53e010..fa4878ae5b15f 100644 --- a/libc/test/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt @@ -12,3 +12,21 @@ add_libc_unittest( libc.src.__support.CPP.string_view libc.src.__support.arg_list ) + +if(NOT (TARGET libc.src.__support.File.file)) + # Not all platforms have a file implementation. If file is unvailable, + # then we must skip all the parts that need file. + return() +endif() + +add_libc_unittest( + string_reader_test + SUITE + libc_stdio_unittests + SRCS + string_reader_test.cpp + DEPENDS + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.string_reader + libc.src.__support.CPP.string_view +) diff --git a/libc/test/src/stdio/scanf_core/string_reader_test.cpp b/libc/test/src/stdio/scanf_core/string_reader_test.cpp new file mode 100644 index 0000000000000..43e65cc1bab6e --- /dev/null +++ b/libc/test/src/stdio/scanf_core/string_reader_test.cpp @@ -0,0 +1,66 @@ +//===-- Unittests for the scanf String Reader -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/string_reader.h" + +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcScanfStringReaderTest, Constructor) { + char str[10]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); +} + +TEST(LlvmLibcScanfStringReaderTest, SimpleRead) { + const char *str = "abc"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + for (size_t i = 0; i < sizeof(str); ++i) { + ASSERT_EQ(str[i], reader.getc()); + } +} + +TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) { + const char *str = "abcDEF123"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + for (size_t i = 0; i < 5; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 3, cursor should now be on 2 + reader.ungetc(str[4]); + reader.ungetc(str[3]); + reader.ungetc(str[2]); + + for (size_t i = 2; i < 7; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 2, cursor should now be on 5 + reader.ungetc(str[6]); + reader.ungetc(str[5]); + + for (size_t i = 5; i < 10; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 10, which should be back to the start. + for (size_t i = 0; i < 10; ++i) { + reader.ungetc(str[9 - i]); + } + + // Check the whole string. + for (size_t i = 0; i < sizeof(str); ++i) { + ASSERT_EQ(str[i], reader.getc()); + } +} From 1c40d5ec7dc5b782262c79b0f7a57bfea6fbe75c Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 3 Nov 2022 14:22:34 -0700 Subject: [PATCH 470/516] [libc] add scanf string converters This patch adds the basic conversion facilities to scanf as well as unit tests for them. It also adds scanf_main which will be used for the eventual scanf entrypoints. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D137376 --- libc/src/stdio/scanf_core/CMakeLists.txt | 31 ++ libc/src/stdio/scanf_core/converter.cpp | 98 ++++++ libc/src/stdio/scanf_core/converter.h | 33 ++ libc/src/stdio/scanf_core/core_structs.h | 2 +- libc/src/stdio/scanf_core/reader.cpp | 2 + libc/src/stdio/scanf_core/reader.h | 4 + libc/src/stdio/scanf_core/scanf_main.cpp | 47 +++ libc/src/stdio/scanf_core/scanf_main.h | 26 ++ .../src/stdio/scanf_core/string_converter.cpp | 76 +++++ libc/src/stdio/scanf_core/string_converter.h | 25 ++ libc/test/src/stdio/scanf_core/CMakeLists.txt | 13 + .../src/stdio/scanf_core/converter_test.cpp | 295 ++++++++++++++++++ 12 files changed, 651 insertions(+), 1 deletion(-) create mode 100644 libc/src/stdio/scanf_core/converter.cpp create mode 100644 libc/src/stdio/scanf_core/converter.h create mode 100644 libc/src/stdio/scanf_core/scanf_main.cpp create mode 100644 libc/src/stdio/scanf_core/scanf_main.h create mode 100644 libc/src/stdio/scanf_core/string_converter.cpp create mode 100644 libc/src/stdio/scanf_core/string_converter.h create mode 100644 libc/test/src/stdio/scanf_core/converter_test.cpp diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index 91cf5e2ada907..940e9f0d083f3 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -30,6 +30,20 @@ if(NOT (TARGET libc.src.__support.File.file)) return() endif() +add_object_library( + scanf_main + SRCS + scanf_main.cpp + HDRS + scanf_main.h + DEPENDS + .parser + .reader + .converter + .core_structs + libc.src.__support.arg_list +) + add_object_library( string_reader SRCS @@ -58,3 +72,20 @@ add_object_library( .string_reader .file_reader ) + +add_object_library( + converter + SRCS + converter.cpp + string_converter.cpp + HDRS + converter.h + string_converter.h + DEPENDS + .reader + .core_structs + libc.src.__support.ctype_utils + libc.src.__support.CPP.bitset + libc.src.__support.CPP.string_view + libc.src.__support.CPP.limits +) diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp new file mode 100644 index 0000000000000..3cfa8758349ec --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.cpp @@ -0,0 +1,98 @@ +//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/converter.h" + +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include "src/stdio/scanf_core/string_converter.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert(Reader *reader, const FormatSection &to_conv) { + int ret_val = 0; + switch (to_conv.conv_name) { + case '%': + return raw_match(reader, "%"); + case 's': + ret_val = raw_match(reader, " "); + if (ret_val != READ_OK) + return ret_val; + return convert_string(reader, to_conv); + case 'c': + case '[': + return convert_string(reader, to_conv); + // case 'd': + // case 'i': + // case 'u': + // case 'o': + // case 'x': + // case 'X': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_int(reader, to_conv); + // #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT + // case 'f': + // case 'F': + // case 'e': + // case 'E': + // case 'a': + // case 'A': + // case 'g': + // case 'G': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_float(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT + // #ifndef LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'n': + // return convert_write_int(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'p': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_pointer(reader, to_conv); + default: + return raw_match(reader, to_conv.raw_string); + } + return -1; +} + +// raw_string is assumed to have a positive size. +int raw_match(Reader *reader, cpp::string_view raw_string) { + char cur_char = reader->getc(); + int ret_val = READ_OK; + for (size_t i = 0; i < raw_string.size(); ++i) { + // Any space character matches any number of space characters. + if (internal::isspace(raw_string[i])) { + while (internal::isspace(cur_char)) { + cur_char = reader->getc(); + } + } else { + if (raw_string[i] == cur_char) { + cur_char = reader->getc(); + } else { + ret_val = MATCHING_FAILURE; + break; + } + } + } + reader->ungetc(cur_char); + return ret_val; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h new file mode 100644 index 0000000000000..cd91ff66a3aed --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.h @@ -0,0 +1,33 @@ +//===-- Format specifier converter for scanf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +// convert will call a conversion function to convert the FormatSection into +// its string representation, and then that will write the result to the +// reader. +int convert(Reader *reader, const FormatSection &to_conv); + +// raw_match takes a raw string and matches it to the characters obtained from +// the reader. +int raw_match(Reader *reader, cpp::string_view raw_string); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/core_structs.h b/libc/src/stdio/scanf_core/core_structs.h index 213a5e1a2b59a..7f331db362023 100644 --- a/libc/src/stdio/scanf_core/core_structs.h +++ b/libc/src/stdio/scanf_core/core_structs.h @@ -78,7 +78,7 @@ struct FormatSection { enum ErrorCodes : int { // This is the value to be returned by conversions when no error has occurred. - WRITE_OK = 0, + READ_OK = 0, // These are the scanf return values for when an error has occurred. They are // all negative, and should be distinct. FILE_READ_ERROR = -1, diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp index 23dcbd405505d..0d8d5a30f7c4d 100644 --- a/libc/src/stdio/scanf_core/reader.cpp +++ b/libc/src/stdio/scanf_core/reader.cpp @@ -13,6 +13,7 @@ namespace __llvm_libc { namespace scanf_core { char Reader::getc() { + ++cur_chars_read; if (reader_type == ReaderType::String) { return string_reader->get_char(); } else { @@ -21,6 +22,7 @@ char Reader::getc() { } void Reader::ungetc(char c) { + --cur_chars_read; if (reader_type == ReaderType::String) { // The string reader ignores the char c passed to unget since it doesn't // need to place anything back into a buffer, and modifying the source diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h index 4d6ed06c00e7c..4ca25cc0d0cab 100644 --- a/libc/src/stdio/scanf_core/reader.h +++ b/libc/src/stdio/scanf_core/reader.h @@ -26,6 +26,8 @@ class Reader final { const ReaderType reader_type; + size_t cur_chars_read = 0; + public: Reader(StringReader *init_string_reader) : string_reader(init_string_reader), reader_type(ReaderType::String) {} @@ -41,6 +43,8 @@ class Reader final { // This moves the input back by one character, placing c into the buffer if // this is a file reader, else c is ignored. void ungetc(char c); + + size_t chars_read() { return cur_chars_read; } }; } // namespace scanf_core diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp new file mode 100644 index 0000000000000..fcf7af2083f22 --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.cpp @@ -0,0 +1,47 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/scanf_main.h" + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/parser.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args) { + Parser parser(str, args); + int ret_val = READ_OK; + int conversions = 0; + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty() && ret_val == READ_OK; + cur_section = parser.get_next_section()) { + if (cur_section.has_conv) { + ret_val = convert(reader, cur_section); + conversions += ret_val == READ_OK ? 1 : 0; + } else { + ret_val = raw_match(reader, cur_section.raw_string); + } + } + + if (conversions == 0 && ret_val != READ_OK) { + // This is intended to be converted to EOF in the client call to avoid + // including stdio.h in this internal file. + return -1; + } + return conversions; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h new file mode 100644 index 0000000000000..d1db46b7c77dc --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.h @@ -0,0 +1,26 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp new file mode 100644 index 0000000000000..bdbb5c87f75e5 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.cpp @@ -0,0 +1,76 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/string_converter.h" + +#include "src/__support/CPP/limits.h" +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv) { + // %s "Matches a sequence of non-white-space characters" + + // %c "Matches a sequence of characters of exactly the number specified by the + // field width (1 if no field width is present in the directive)" + + // %[ "Matches a nonempty sequence of characters from a set of expected + // characters (the scanset)." + size_t max_width = 0; + if (to_conv.max_width > 0) { + max_width = to_conv.max_width; + } else { + if (to_conv.conv_name == 'c') { + max_width = 1; + } else { + max_width = cpp::numeric_limits::max(); + } + } + + char *output = reinterpret_cast(to_conv.output_ptr); + + char cur_char = reader->getc(); + size_t i = 0; + for (; i < max_width && cur_char != '\0'; ++i) { + // If this is %s and we've hit a space, or if this is %[] and we've found + // something not in the scanset. + if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) || + (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) { + break; + } + // if the NO_WRITE flag is not set, write to the output. + if ((to_conv.flags & NO_WRITE) == 0) + output[i] = cur_char; + cur_char = reader->getc(); + } + + // We always read one more character than will be used, so we have to put the + // last one back. + reader->ungetc(cur_char); + + // If this is %s or %[] + if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) { + // Always null terminate the string. This may cause a write to the + // (max_width + 1) byte, which is correct. The max width describes the max + // number of characters read from the input string, and doesn't necessarily + // correspond to the output. + output[i] = '\0'; + } + + if (i == 0) + return MATCHING_FAILURE; + return READ_OK; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h new file mode 100644 index 0000000000000..4113f5cb9a369 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.h @@ -0,0 +1,25 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H + +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt index fa4878ae5b15f..db20335a5c943 100644 --- a/libc/test/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt @@ -30,3 +30,16 @@ add_libc_unittest( libc.src.stdio.scanf_core.string_reader libc.src.__support.CPP.string_view ) + +add_libc_unittest( + converter_test + SUITE + libc_stdio_unittests + SRCS + converter_test.cpp + DEPENDS + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.string_reader + libc.src.stdio.scanf_core.converter + libc.src.__support.CPP.string_view +) diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp new file mode 100644 index 0000000000000..d90af34ff1979 --- /dev/null +++ b/libc/test/src/stdio/scanf_core/converter_test.cpp @@ -0,0 +1,295 @@ +//===-- Unittests for the basic scanf converters --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/string_reader.h" + +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcScanfConverterTest, RawMatchBasic) { + const char *str = "abcdef"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "abc" should succeed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "abc"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading nothing should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, ""), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading a space where there is none should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading "d" should succeed and advance by 1. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "z" should fail and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "z"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "efgh" should fail but advance to the end. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "efgh"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(6)); +} + +TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) { + const char *str = " a \t\n b cd"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "a" should fail and not advance. + // Since there's nothing in the format string (the second argument to + // raw_match) to match the space in the buffer it isn't consumed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "a"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(0)); + + // Reading " \t\n " should succeed and advance past the space. + // Any number of space characters in the format string match 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " \t\n "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + + // Reading "ab" should fail and only advance past the a + // The a characters match, but the format string doesn't have anything to + // consume the spaces in the buffer, so it fails. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "ab"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + + // Reading " b" should succeed and advance past the b + // Any number of space characters in the format string matches 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " b"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(7)); + + // Reading "\t" should succeed and advance past the spaces to the c + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "\t"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(10)); + + // Reading "c d" should succeed and advance past the d. + // Here the space character in the format string is matching 0 space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "c d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(12)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvSimple) { + const char *str = "abcDEF123 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "abcDEF123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "654LKJihg"); +} + +TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) { + const char *str = "abcDEF123 654LKJihg"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.flags = __llvm_libc::scanf_core::NO_WRITE; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvWidth) { + const char *str = "abcDEF123 654LKJihg"; + char result[6]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.max_width = 5; // this means the result takes up 6 characters (with \0). + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(5)); + ASSERT_STREQ(result, "abcDE"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "F123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(15)); + ASSERT_STREQ(result, "654LK"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "Jihg"); +} + +TEST(LlvmLibcScanfConverterTest, CharsConv) { + const char *str = "abcDEF123 654LKJihg MNOpqr&*("; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 'c'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + ASSERT_EQ(result[0], 'a'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + ASSERT_EQ(result[0], 'b'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + ASSERT_EQ(result[0], 'c'); + + // Switch from character by character to 8 at a time. + conv.max_width = 8; + __llvm_libc::cpp::string_view result_view(result, 8); + + //%c doesn't stop on spaces. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("DEF123 6", 8)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("54LKJihg", 8)); + + //%c also doesn't skip spaces at the start. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(27)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view(" MNOpqr&", 8)); + + //%c will stop on a null byte though. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(29)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 2), + __llvm_libc::cpp::string_view("*(", 2)); +} + +TEST(LlvmLibcScanfConverterTest, ScansetConv) { + const char *str = "abcDEF[123] 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = '['; + conv.output_ptr = result; + + __llvm_libc::cpp::bitset<256> bitset1; + bitset1.set_range('a', 'c'); + bitset1.set_range('D', 'F'); + bitset1.set_range('1', '6'); + bitset1.set('['); + bitset1.set(']'); + + conv.scan_set = bitset1; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 11), + __llvm_libc::cpp::string_view("abcDEF[123]", 11)); + + // The scanset conversion doesn't consume leading spaces. If it did it would + // return "654" here. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + + // This set is everything except for a-g. + __llvm_libc::cpp::bitset<256> bitset2; + bitset2.set_range('a', 'g'); + bitset2.flip(); + conv.scan_set = bitset2; + + conv.max_width = 5; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(16)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 5), + __llvm_libc::cpp::string_view(" 654L", 5)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(20)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 4), + __llvm_libc::cpp::string_view("KJih", 4)); + + // This set is g and '\0'. + __llvm_libc::cpp::bitset<256> bitset3; + bitset3.set('g'); + bitset3.set('\0'); + conv.scan_set = bitset3; + + // Even though '\0' is in the scanset, it should still stop on it. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(21)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 1), + __llvm_libc::cpp::string_view("g", 1)); +} From 86674f66cc78a1a121d43fe51f076cbfa8710b1a Mon Sep 17 00:00:00 2001 From: Grace Jennings Date: Mon, 7 Nov 2022 13:16:54 -0800 Subject: [PATCH 471/516] [HLSL] Added HLSL this as a reference This change makes `this` a reference instead of a pointer in HLSL. HLSL does not have the `->` operator, and accesses through `this` are with the `.` syntax. Tests were added and altered to make sure the AST accurately reflects the types. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D135721 --- clang/lib/AST/ExprClassification.cpp | 5 +- clang/lib/CodeGen/CGExpr.cpp | 2 + clang/lib/Sema/HLSLExternalSemaSource.cpp | 16 +++-- clang/lib/Sema/SemaDeclCXX.cpp | 14 ++++- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/lib/Sema/SemaExprCXX.cpp | 7 +++ clang/lib/Sema/SemaExprMember.cpp | 8 +++ clang/test/AST/HLSL/RWBuffer-AST.hlsl | 8 +-- .../AST/HLSL/this-reference-template.hlsl | 46 ++++++++++++++ clang/test/AST/HLSL/this-reference.hlsl | 62 +++++++++++++++++++ .../CodeGenHLSL/this-assignment-overload.hlsl | 55 ++++++++++++++++ clang/test/CodeGenHLSL/this-assignment.hlsl | 45 ++++++++++++++ clang/test/CodeGenHLSL/this-reference.hlsl | 28 +++++++++ 13 files changed, 283 insertions(+), 15 deletions(-) create mode 100644 clang/test/AST/HLSL/this-reference-template.hlsl create mode 100644 clang/test/AST/HLSL/this-reference.hlsl create mode 100644 clang/test/CodeGenHLSL/this-assignment-overload.hlsl create mode 100644 clang/test/CodeGenHLSL/this-assignment.hlsl create mode 100644 clang/test/CodeGenHLSL/this-reference.hlsl diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index 6c122cac2c60b..88081d9ed73a5 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -160,7 +160,6 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::CXXPseudoDestructorExprClass: case Expr::UnaryExprOrTypeTraitExprClass: case Expr::CXXNewExprClass: - case Expr::CXXThisExprClass: case Expr::CXXNullPtrLiteralExprClass: case Expr::ImaginaryLiteralClass: case Expr::GNUNullExprClass: @@ -205,6 +204,10 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::RequiresExprClass: return Cl::CL_PRValue; + // Make HLSL this reference-like + case Expr::CXXThisExprClass: + return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue; + case Expr::ConstantExprClass: return ClassifyInternal(Ctx, cast(E)->getSubExpr()); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 493b340ecdc52..40d9b8f37b4a6 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1383,6 +1383,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitOMPArraySectionExpr(cast(E)); case Expr::ExtVectorElementExprClass: return EmitExtVectorElementExpr(cast(E)); + case Expr::CXXThisExprClass: + return MakeAddrLValue(LoadCXXThisAddress(), E->getType()); case Expr::MemberExprClass: return EmitMemberExpr(cast(E)); case Expr::CompoundLiteralExprClass: diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index f1b90fa971866..7459d9ce82455 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -175,9 +175,11 @@ struct BuiltinTypeDeclBuilder { Expr *Call = CallExpr::Create(AST, Fn, {RCExpr}, AST.VoidPtrTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); - CXXThisExpr *This = new (AST) - CXXThisExpr(SourceLocation(), Constructor->getThisType(), true); - Expr *Handle = MemberExpr::CreateImplicit(AST, This, true, Fields["h"], + CXXThisExpr *This = new (AST) CXXThisExpr( + SourceLocation(), + Constructor->getThisType().getTypePtr()->getPointeeType(), true); + This->setValueKind(ExprValueKind::VK_LValue); + Expr *Handle = MemberExpr::CreateImplicit(AST, This, false, Fields["h"], Fields["h"]->getType(), VK_LValue, OK_Ordinary); @@ -260,10 +262,12 @@ struct BuiltinTypeDeclBuilder { auto FnProtoLoc = TSInfo->getTypeLoc().getAs(); FnProtoLoc.setParam(0, IdxParam); - auto *This = new (AST) - CXXThisExpr(SourceLocation(), MethodDecl->getThisType(), true); + auto *This = new (AST) CXXThisExpr( + SourceLocation(), + MethodDecl->getThisType().getTypePtr()->getPointeeType(), true); + This->setValueKind(ExprValueKind::VK_LValue); auto *HandleAccess = MemberExpr::CreateImplicit( - AST, This, true, Handle, Handle->getType(), VK_LValue, OK_Ordinary); + AST, This, false, Handle, Handle->getType(), VK_LValue, OK_Ordinary); auto *IndexExpr = DeclRefExpr::Create( AST, NestedNameSpecifierLoc(), SourceLocation(), IdxParam, false, diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index ea7997b347959..73603b51de8a8 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -14681,7 +14681,8 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, MemberBuilder From(OtherRef, OtherRefType, /*IsArrow=*/false, MemberLookup); - MemberBuilder To(This, getCurrentThisType(), /*IsArrow=*/true, MemberLookup); + MemberBuilder To(This, getCurrentThisType(), /*IsArrow=*/!LangOpts.HLSL, + MemberLookup); // Build the copy of this field. StmtResult Copy = buildSingleCopyAssign(*this, Loc, FieldType, @@ -14699,9 +14700,16 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, if (!Invalid) { // Add a "return *this;" - ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc)); + Expr *ThisExpr = nullptr; + if (!LangOpts.HLSL) { + ExprResult ThisObj = + CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc)); + ThisExpr = ThisObj.get(); + } else { + ThisExpr = This.build(*this, Loc); + } - StmtResult Return = BuildReturnStmt(Loc, ThisObj.get()); + StmtResult Return = BuildReturnStmt(Loc, ThisExpr); if (Return.isInvalid()) Invalid = true; else diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2493b4a76d5e1..c40bcb083907b 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -15587,7 +15587,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, } } - if (getLangOpts().HLSL) { + if (getLangOpts().HLSL && OpLoc.isValid()) { if (Opc == UO_AddrOf) return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 0); if (Opc == UO_Deref) diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 41c4348de0791..c093eab991f20 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1390,6 +1390,13 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit) { + if (getLangOpts().HLSL && Type.getTypePtr()->isPointerType()) { + auto *This = new (Context) + CXXThisExpr(Loc, Type.getTypePtr()->getPointeeType(), IsImplicit); + This->setValueKind(ExprValueKind::VK_LValue); + MarkThisReferenced(This); + return This; + } auto *This = new (Context) CXXThisExpr(Loc, Type, IsImplicit); MarkThisReferenced(This); return This; diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index fc68b526e30ca..8eeed1a29dfc7 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1903,6 +1903,14 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS, if (SS.getRange().isValid()) Loc = SS.getRange().getBegin(); baseExpr = BuildCXXThisExpr(loc, ThisTy, /*IsImplicit=*/true); + if (getLangOpts().HLSL && ThisTy.getTypePtr()->isPointerType()) { + ThisTy = ThisTy.getTypePtr()->getPointeeType(); + return BuildMemberReferenceExpr(baseExpr, ThisTy, + /*OpLoc*/ SourceLocation(), + /*IsArrow*/ false, SS, TemplateKWLoc, + /*FirstQualifierInScope*/ nullptr, R, + TemplateArgs, S); + } } return BuildMemberReferenceExpr(baseExpr, ThisTy, diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl index 0929462e51831..9dd9244b73eed 100644 --- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl @@ -46,8 +46,8 @@ RWBuffer Buffer; // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue -// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue ->h 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'const RWBuffer *' implicit this +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'const RWBuffer' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline @@ -56,8 +56,8 @@ RWBuffer Buffer; // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue -// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue ->h 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWBuffer *' implicit this +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWBuffer' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline diff --git a/clang/test/AST/HLSL/this-reference-template.hlsl b/clang/test/AST/HLSL/this-reference-template.hlsl new file mode 100644 index 0000000000000..c27d69d36ca3e --- /dev/null +++ b/clang/test/AST/HLSL/this-reference-template.hlsl @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +template +struct Pair { + K First; + V Second; + + K getFirst() { + return this.First; + } + + V getSecond() { + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:8:5 getFirst 'K ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXDependentScopeMemberExpr 0x{{[0-9A-Fa-f]+}} '' lvalue .First +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:12:5 getSecond 'V ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'V' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:8:5 used getFirst 'int ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:12:5 used getSecond 'float ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this diff --git a/clang/test/AST/HLSL/this-reference.hlsl b/clang/test/AST/HLSL/this-reference.hlsl new file mode 100644 index 0000000000000..67d8e7b7b9119 --- /dev/null +++ b/clang/test/AST/HLSL/this-reference.hlsl @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +class Pair { + int First; + int Second; + + int getFirst() { + return this.First; + } + + int getSecond() { + return Second; + } +}; + +class PairInfo : Pair { + int Sum; + + int getSum() { + return this.First + Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); + + PairInfo ValsInfo; + ValsInfo.First = Vals.First; + ValsInfo.Second = Vals.Second; + ValsInfo.Sum = ValsInfo.getSum(); + +} + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:7:7 used getFirst 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:11:7 used getSecond 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this + + +// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:19:7 used getSum 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-BinaryOperator 0x{{[0-9A-Fa-f]+}} 'int' '+' +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'PairInfo' lvalue this +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'PairInfo' lvalue implicit this diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl new file mode 100644 index 0000000000000..92504dfbd6261 --- /dev/null +++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes -o - -std=hlsl202x %s | FileCheck %s + +struct Pair { + int First; + int Second; + int getFirst() { + Pair Another = {5, 10}; + this = Another; + return this.First; + } + int getSecond() { + this = Pair(); + return Second; + } + void operator=(Pair P) { + First = P.First; + Second = 2; + } +}; +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators. +// CHECK: define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%Another = alloca %struct.Pair, align 4 +// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:%First = getelementptr inbounds %struct.Pair, ptr %Another, i32 0, i32 0 +// CHECK-NEXT:store i32 5, ptr %First, align 4 +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %Another, i32 0, i32 1 +// CHECK-NEXT:store i32 10, ptr %Second, align 4 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Another, i32 8, i1 false) +// CHECK-NEXT:call void @"??4Pair@@QAAXU0@@Z"(ptr noundef nonnull align 4 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 4 %agg.tmp) +// CHECK-NEXT:%First2 = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 0 +// CHECK-NEXT:%0 = load i32, ptr %First2, align 4 +// CHECK-NEXT:ret i32 %0 + +// CHECK: define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 4 %agg.tmp, i8 0, i32 8, i1 false) +// CHECK-NEXT:call void @"??4Pair@@QAAXU0@@Z"(ptr noundef nonnull align 4 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 4 %agg.tmp) +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 1 +// CHECK-NEXT:%0 = load i32, ptr %Second, align 4 +// CHECK-NEXT:ret i32 %0 diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl new file mode 100644 index 0000000000000..bb67fb6e103c5 --- /dev/null +++ b/clang/test/CodeGenHLSL/this-assignment.hlsl @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +struct Pair { + int First; + int Second; + + int getFirst() { + Pair Another = {5, 10}; + this = Another; + return this.First; + } + + int getSecond() { + this = Pair(); + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This tests reference like implicit this in HLSL +// CHECK: define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%Another = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Another, ptr align 4 @"__const.?getFirst@Pair@@QAAHXZ.Another", i32 8, i1 false) +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %this1, ptr align 4 %Another, i32 8, i1 false) +// CHECK-NEXT:%First = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 0 + +// CHECK: define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%ref.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 4 %ref.tmp, i8 0, i32 8, i1 false) +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %this1, ptr align 4 %ref.tmp, i32 8, i1 false) +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 1 diff --git a/clang/test/CodeGenHLSL/this-reference.hlsl b/clang/test/CodeGenHLSL/this-reference.hlsl new file mode 100644 index 0000000000000..22bab1d90c70a --- /dev/null +++ b/clang/test/CodeGenHLSL/this-reference.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +struct Pair { + int First; + float Second; + + int getFirst() { + return this.First; + } + + float getSecond() { + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This tests reference like `this` in HLSL + // CHECK: %call = call noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %Vals) + // CHECK-NEXT: %First = getelementptr inbounds %struct.Pair, ptr %Vals, i32 0, i32 0 + // CHECK-NEXT: store i32 %call, ptr %First, align 4 + // CHECK-NEXT: %call1 = call noundef float @"?getSecond@Pair@@QAAMXZ"(ptr noundef nonnull align 4 dereferenceable(8) %Vals) + // CHECK-NEXT: %Second = getelementptr inbounds %struct.Pair, ptr %Vals, i32 0, i32 1 From 430ca14af835a4d8ea927ed6550a99242bebf255 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 7 Nov 2022 14:02:32 -0800 Subject: [PATCH 472/516] [libc][obvious] fix tests using wrong size for string In the code const char *str = "abc" if you do sizeof(str) you get the size of the pointer, not the string. This patch fixes that mistake. Differential Revision: https://reviews.llvm.org/D137586 --- libc/test/src/stdio/scanf_core/string_reader_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/test/src/stdio/scanf_core/string_reader_test.cpp b/libc/test/src/stdio/scanf_core/string_reader_test.cpp index 43e65cc1bab6e..4331d488be06a 100644 --- a/libc/test/src/stdio/scanf_core/string_reader_test.cpp +++ b/libc/test/src/stdio/scanf_core/string_reader_test.cpp @@ -23,7 +23,7 @@ TEST(LlvmLibcScanfStringReaderTest, SimpleRead) { __llvm_libc::scanf_core::StringReader str_reader(str); __llvm_libc::scanf_core::Reader reader(&str_reader); - for (size_t i = 0; i < sizeof(str); ++i) { + for (size_t i = 0; i < sizeof("abc"); ++i) { ASSERT_EQ(str[i], reader.getc()); } } @@ -60,7 +60,7 @@ TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) { } // Check the whole string. - for (size_t i = 0; i < sizeof(str); ++i) { + for (size_t i = 0; i < sizeof("abcDEF123"); ++i) { ASSERT_EQ(str[i], reader.getc()); } } From eedbe44b8755f7d162eee43cb3e8c9da1e61ebad Mon Sep 17 00:00:00 2001 From: Dan Albert Date: Thu, 6 Oct 2022 15:22:55 -0700 Subject: [PATCH 473/516] [LLD] Enable --no-undefined-version by default. Allowing incorrect version scripts is not a helpful default. Flip that to help users find their bugs at build time rather than at run time. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D135402 --- lld/ELF/Driver.cpp | 2 +- lld/ELF/Options.td | 2 +- lld/docs/ReleaseNotes.rst | 3 +++ lld/docs/ld.lld.1 | 4 ++-- lld/test/ELF/verdef-defaultver.s | 6 +++--- lld/test/ELF/verdef-dependency.s | 2 +- lld/test/ELF/verneed.s | 4 ++-- lld/test/ELF/version-script-extern-undefined.s | 2 +- lld/test/ELF/version-script-local-preemptible.s | 2 +- lld/test/ELF/version-script-noundef.s | 3 ++- lld/test/ELF/version-script-reassign.s | 2 +- 11 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 271776ddd32b8..1a2acc51c3c2e 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1244,7 +1244,7 @@ static void readConfigs(opt::InputArgList &args) { config->trace = args.hasArg(OPT_trace); config->undefined = args::getStrings(args, OPT_undefined); config->undefinedVersion = - args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true); + args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, false); config->unique = args.hasArg(OPT_unique); config->useAndroidRelrTags = args.hasFlag( OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 2e9e057a09615..a8fc63e6a9196 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -443,7 +443,7 @@ defm unresolved_symbols: Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">; defm undefined_version: B<"undefined-version", - "Allow unused version in version script (default)", + "Allow unused version in version script (disabled by default)", "Report version scripts that refer undefined symbols">; defm rsp_quoting: EEq<"rsp-quoting", "Quoting style for response files">, diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 141f34103da15..0157e40fa6612 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -33,6 +33,9 @@ ELF Improvements (`D133548 `_) * ``--no-warnings``/``-w`` is now available to suppress warnings. (`D136569 `_) +* ``--no-undefined-version`` is now the default; symbols named in version + scripts that have no matching symbol in the output will be reported. Use + ``--undefined-version`` to revert to the old behavior. Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 2b530af39ad64..edeb7c4bfe37c 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -352,8 +352,8 @@ Do not set the text data sections to be writable, page align sections. Disable target-specific relaxations. For x86-64 this disables R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX GOT optimization. .It Fl -no-rosegment Do not put read-only non-executable sections in their own segment. -.It Fl -no-undefined-version -Report version scripts that refer undefined symbols. +.It Fl -undefined-version +Do not report version scripts that refer to undefined symbols. .It Fl -no-undefined Report unresolved symbols even if the linker is creating a shared library. .It Fl -no-warn-symbol-ordering diff --git a/lld/test/ELF/verdef-defaultver.s b/lld/test/ELF/verdef-defaultver.s index 7becdcf96422b..661f6c4e7da42 100644 --- a/lld/test/ELF/verdef-defaultver.s +++ b/lld/test/ELF/verdef-defaultver.s @@ -4,7 +4,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/verdef-defaultver.s -o %t1 # RUN: echo "V1 { global: a; b; local: *; };" > %t.script # RUN: echo "V2 { global: b; c; } V1;" >> %t.script -# RUN: ld.lld --hash-style=sysv -shared -soname shared %t1 --version-script %t.script -o %t.so +# RUN: ld.lld --hash-style=sysv -shared -soname shared %t1 --version-script %t.script --undefined-version -o %t.so # RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck --check-prefix=DSO %s # DSO: DynamicSymbols [ @@ -195,9 +195,9 @@ # EXE-NEXT: ] # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o -# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings %t.so b.o -o b.so +# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings --undefined-version %t.so b.o -o b.so # RUN: llvm-readelf --dyn-syms b.so | FileCheck %s --check-prefix=PREEMPT -# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings b.o %t.so -o b.so +# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings --undefined-version b.o %t.so -o b.so # RUN: llvm-readelf --dyn-syms b.so | FileCheck %s --check-prefix=PREEMPT # PREEMPT-DAG: a@@V1 diff --git a/lld/test/ELF/verdef-dependency.s b/lld/test/ELF/verdef-dependency.s index d716436202535..89ebc3043ad44 100644 --- a/lld/test/ELF/verdef-dependency.s +++ b/lld/test/ELF/verdef-dependency.s @@ -3,7 +3,7 @@ # RUN: echo "LIBSAMPLE_1.0 { global: a; local: *; };" > %t.script # RUN: echo "LIBSAMPLE_2.0 { global: b; local: *; } LIBSAMPLE_1.0;" >> %t.script # RUN: echo "LIBSAMPLE_3.0 { global: c; } LIBSAMPLE_2.0;" >> %t.script -# RUN: ld.lld --version-script %t.script -shared -soname shared %t.o -o %t.so +# RUN: ld.lld --version-script %t.script --undefined-version -shared -soname shared %t.o -o %t.so # RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck --check-prefix=DSO %s # DSO: VersionDefinitions [ diff --git a/lld/test/ELF/verneed.s b/lld/test/ELF/verneed.s index 6a90cc48e68fb..734387a62785f 100644 --- a/lld/test/ELF/verneed.s +++ b/lld/test/ELF/verneed.s @@ -1,9 +1,9 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/Inputs/verneed1.s -o %t1.o # RUN: echo "v1 {}; v2 {}; v3 { global: f1; local: *; };" > %t.script -# RUN: ld.lld -shared %t1.o --version-script %t.script -o %t1.so -soname verneed1.so.0 +# RUN: ld.lld -shared %t1.o --version-script %t.script --undefined-version -o %t1.so -soname verneed1.so.0 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/Inputs/verneed2.s -o %t2.o -# RUN: ld.lld -shared %t2.o --version-script %t.script -o %t2.so -soname verneed2.so.0 +# RUN: ld.lld -shared %t2.o --version-script %t.script --undefined-version -o %t2.so -soname verneed2.so.0 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: ld.lld --hash-style=sysv %t.o %t1.so %t2.so -o %t diff --git a/lld/test/ELF/version-script-extern-undefined.s b/lld/test/ELF/version-script-extern-undefined.s index 58b4d2e0fe53f..38114229e0ce3 100644 --- a/lld/test/ELF/version-script-extern-undefined.s +++ b/lld/test/ELF/version-script-extern-undefined.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: echo "FOO { global: extern \"C++\" { \"abb(int)\"; }; };" > %t.script -# RUN: ld.lld --version-script %t.script -shared %t.o -o %t.so +# RUN: ld.lld --version-script %t.script --undefined-version -shared %t.o -o %t.so # RUN: llvm-readobj -V %t.so | FileCheck %s # CHECK: VersionSymbols [ diff --git a/lld/test/ELF/version-script-local-preemptible.s b/lld/test/ELF/version-script-local-preemptible.s index ffb16648dc800..033c9459fb56c 100644 --- a/lld/test/ELF/version-script-local-preemptible.s +++ b/lld/test/ELF/version-script-local-preemptible.s @@ -10,7 +10,7 @@ # RUN: echo "{ global: main; local: *; };" > %t.script # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: ld.lld %t.o %t.so -o %t -version-script %t.script +# RUN: ld.lld %t.o %t.so -o %t -version-script %t.script --undefined-version # RUN: llvm-readelf -r --symbols %t | FileCheck %s # CHECK: Relocation section '.rela.plt' at offset {{.*}} contains 1 entries: diff --git a/lld/test/ELF/version-script-noundef.s b/lld/test/ELF/version-script-noundef.s index 18916b66f064e..b99fb1779f6eb 100644 --- a/lld/test/ELF/version-script-noundef.s +++ b/lld/test/ELF/version-script-noundef.s @@ -2,7 +2,8 @@ # RUN: echo "VERSION_1.0 { global: bar; };" > %t.script # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: ld.lld --version-script %t.script -shared %t.o -o /dev/null --fatal-warnings +# RUN: not ld.lld --version-script %t.script -shared %t.o -o /dev/null \ +# RUN: --fatal-warnings 2>&1 | FileCheck -check-prefix=ERR1 %s # RUN: ld.lld --version-script %t.script -shared --undefined-version %t.o -o %t.so # RUN: not ld.lld --version-script %t.script -shared --no-undefined-version \ # RUN: %t.o -o %t.so 2>&1 | FileCheck -check-prefix=ERR1 %s diff --git a/lld/test/ELF/version-script-reassign.s b/lld/test/ELF/version-script-reassign.s index 2ed5b15faceda..371390019a4dd 100644 --- a/lld/test/ELF/version-script-reassign.s +++ b/lld/test/ELF/version-script-reassign.s @@ -24,7 +24,7 @@ # RUN: llvm-readelf --dyn-syms %t.so | FileCheck --check-prefix=V1-SYM %s # RUN: ld.lld -shared %t.o --version-script %t1.ver --version-script %t2w.ver \ -# RUN: -o %t.so --fatal-warnings +# RUN: -o %t.so --fatal-warnings --undefined-version # RUN: llvm-readelf --dyn-syms %t.so | FileCheck --check-prefix=V1-SYM %s # LOCAL: warning: attempt to reassign symbol 'foo' of VER_NDX_LOCAL to version 'V1' From 432a7e284440c719637fe34b972c96c9af0e01f1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Nov 2022 22:15:28 +0000 Subject: [PATCH 474/516] [libomp] Add -Wl,--undefined-version workaround Issue #58858: when linking libomp.so, exports_so.txt has non-existent/undefined symbols which cause errors to ld --no-undefined-version. Use -Wl,--undefined-version if available (gold, ld.lld, future GNU ld 2.40). --- openmp/runtime/cmake/LibompHandleFlags.cmake | 1 + openmp/runtime/cmake/config-ix.cmake | 1 + 2 files changed, 2 insertions(+) diff --git a/openmp/runtime/cmake/LibompHandleFlags.cmake b/openmp/runtime/cmake/LibompHandleFlags.cmake index aee9038520b8a..33847b5545fbc 100644 --- a/openmp/runtime/cmake/LibompHandleFlags.cmake +++ b/openmp/runtime/cmake/LibompHandleFlags.cmake @@ -100,6 +100,7 @@ function(libomp_get_ldflags ldflags) libomp_append(ldflags_local -Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG) libomp_append(ldflags_local -Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG) libomp_append(ldflags_local "-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG) + libomp_append(ldflags_local "-Wl,--undefined-version" LIBOMP_HAVE_UNDEFINED_VERSION_FLAG) # FIXME issue #58858 libomp_append(ldflags_local -static-libgcc LIBOMP_HAVE_STATIC_LIBGCC_FLAG) libomp_append(ldflags_local -Wl,-z,noexecstack LIBOMP_HAVE_Z_NOEXECSTACK_FLAG) libomp_append(ldflags_local -no-intel-extensions LIBOMP_HAVE_NO_INTEL_EXTENSIONS_FLAG) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index a57cbf9d2ef59..ac6c81670211b 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -133,6 +133,7 @@ elseif(NOT APPLE) libomp_check_linker_flag(-Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG) libomp_check_linker_flag(-Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG) libomp_check_linker_flag("-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG) + libomp_check_linker_flag("-Wl,--undefined-version" LIBOMP_HAVE_UNDEFINED_VERSION_FLAG) # FIXME issue #58858 libomp_check_linker_flag(-static-libgcc LIBOMP_HAVE_STATIC_LIBGCC_FLAG) libomp_check_linker_flag(-Wl,-z,noexecstack LIBOMP_HAVE_Z_NOEXECSTACK_FLAG) endif() From 79f9f1f8e33523ff64a34520f8a35dd819a74154 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Sat, 5 Nov 2022 16:29:11 +0100 Subject: [PATCH 475/516] [lld-macho] Ensure that chained fixups data comes first in __LINKEDIT libstuff-based tools (e.g. `codesign` and `strip`) require `__chainfixups` to be the first section in `__LINKEDIT`, and print a "file not in an order that can be processed" error message if that is not the case. Differential Revision: https://reviews.llvm.org/D137492 --- lld/MachO/OutputSegment.cpp | 1 + lld/test/MachO/linkedit-contiguity.s | 41 ++++++++++++++++++---------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp index be541d29f19e8..3b28dfd306c38 100644 --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -125,6 +125,7 @@ static int sectionOrder(OutputSection *osec) { } } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) + .Case(section_names::chainFixups, -11) .Case(section_names::rebase, -10) .Case(section_names::binding, -9) .Case(section_names::weakBinding, -8) diff --git a/lld/test/MachO/linkedit-contiguity.s b/lld/test/MachO/linkedit-contiguity.s index 9cf3b500b922b..e85b312d8add2 100644 --- a/lld/test/MachO/linkedit-contiguity.s +++ b/lld/test/MachO/linkedit-contiguity.s @@ -2,7 +2,7 @@ # RUN: rm -rf %t; split-file %s %t ## codesign requires that each section in __LINKEDIT ends where the next one -## starts. This test enforces that invariant. +## starts and that they follow a certain order. This test enforces that invariant. ## It also checks that the last section in __LINKEDIT covers the last byte of ## the segment. @@ -10,9 +10,12 @@ # RUN: %lld %t/foo.o -dylib -o %t/libfoo.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o + # RUN: %lld -lSystem -adhoc_codesign -o %t/test %t/libfoo.dylib %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck --check-prefixes=CHECK,OPCODE %s -# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s +# RUN: %lld -lSystem -adhoc_codesign -fixup_chains -o %t/chained_test %t/libfoo.dylib %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/chained_test | FileCheck --check-prefixes=CHECK,CHAINED %s # CHECK: segname __LINKEDIT # CHECK-NEXT: vmaddr @@ -20,18 +23,28 @@ # CHECK-NEXT: fileoff [[#LINKEDIT_OFF:]] # CHECK-NEXT: filesize [[#LINKEDIT_SIZE:]] -# CHECK: cmd LC_DYLD_INFO_ONLY -# CHECK-NEXT: cmdsize 48 -# CHECK-NEXT: rebase_off [[#REBASE_OFF:]] -# CHECK-NEXT: rebase_size [[#REBASE_SIZE:]] -# CHECK-NEXT: bind_off [[#BIND_OFF: REBASE_OFF + REBASE_SIZE]] -# CHECK-NEXT: bind_size [[#BIND_SIZE:]] -# CHECK-NEXT: weak_bind_off [[#WEAK_OFF: BIND_OFF + BIND_SIZE]] -# CHECK-NEXT: weak_bind_size [[#WEAK_SIZE:]] -# CHECK-NEXT: lazy_bind_off [[#LAZY_OFF: WEAK_OFF + WEAK_SIZE]] -# CHECK-NEXT: lazy_bind_size [[#LAZY_SIZE:]] -# CHECK-NEXT: export_off [[#EXPORT_OFF: LAZY_OFF + LAZY_SIZE]] -# CHECK-NEXT: export_size [[#EXPORT_SIZE:]] +# OPCODE: cmd LC_DYLD_INFO_ONLY +# OPCODE-NEXT: cmdsize 48 +# OPCODE-NEXT: rebase_off [[#REBASE_OFF:]] +# OPCODE-NEXT: rebase_size [[#REBASE_SIZE:]] +# OPCODE-NEXT: bind_off [[#BIND_OFF: REBASE_OFF + REBASE_SIZE]] +# OPCODE-NEXT: bind_size [[#BIND_SIZE:]] +# OPCODE-NEXT: weak_bind_off [[#WEAK_OFF: BIND_OFF + BIND_SIZE]] +# OPCODE-NEXT: weak_bind_size [[#WEAK_SIZE:]] +# OPCODE-NEXT: lazy_bind_off [[#LAZY_OFF: WEAK_OFF + WEAK_SIZE]] +# OPCODE-NEXT: lazy_bind_size [[#LAZY_SIZE:]] +# OPCODE-NEXT: export_off [[#EXPORT_OFF: LAZY_OFF + LAZY_SIZE]] +# OPCODE-NEXT: export_size [[#EXPORT_SIZE:]] + +# CHAINED: cmd LC_DYLD_CHAINED_FIXUPS +# CHAINED-NEXT: cmdsize +# CHAINED-NEXT: dataoff [[#FIXUPS_OFF: LINKEDIT_OFF]] +# CHAINED-NEXT: datasize [[#FIXUPS_SIZE:]] + +# CHAINED: cmd LC_DYLD_EXPORTS_TRIE +# CHAINED-NEXT: cmdsize +# CHAINED-NEXT: dataoff [[#EXPORT_OFF: FIXUPS_OFF + FIXUPS_SIZE]] +# CHAINED-NEXT: datasize [[#EXPORT_SIZE:]] # CHECK: cmd LC_FUNCTION_STARTS # CHECK-NEXT: cmdsize From afa22c563f12f22da93b8f172d18c7991392d071 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sun, 25 Sep 2022 01:07:58 -0700 Subject: [PATCH 476/516] [clangd] Pass the entire tooling::CompileCommand to CommandMangler This gives CommandMangler access to other fields of tooling::CompileCommand as well, e.g. Directory. Differential Revision: https://reviews.llvm.org/D133756 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- clang-tools-extra/clangd/CompileCommands.cpp | 16 +- clang-tools-extra/clangd/CompileCommands.h | 9 +- .../clangd/GlobalCompilationDatabase.cpp | 12 +- .../clangd/GlobalCompilationDatabase.h | 10 +- .../clangd/indexer/IndexerMain.cpp | 9 +- clang-tools-extra/clangd/tool/Check.cpp | 3 +- .../clangd/unittests/BackgroundIndexTests.cpp | 2 +- .../clangd/unittests/ClangdTests.cpp | 2 +- .../clangd/unittests/CompileCommandsTests.cpp | 152 ++++++++++-------- .../GlobalCompilationDatabaseTests.cpp | 6 +- clang-tools-extra/clangd/unittests/TestTU.cpp | 2 +- 12 files changed, 121 insertions(+), 104 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 8b94d0d84fd4c..b84d1c706d406 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -509,7 +509,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, - tooling::ArgumentsAdjuster(std::move(Mangler))); + std::move(Mangler)); { // Switch caller's context with LSPServer's background context. Since we // rather want to propagate information from LSPServer's context into the diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 02acc92265ffa..47e92bdf4ec37 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -13,7 +13,6 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInvocation.h" -#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -195,8 +194,9 @@ CommandMangler CommandMangler::detect() { CommandMangler CommandMangler::forTests() { return CommandMangler(); } -void CommandMangler::adjust(std::vector &Cmd, - llvm::StringRef File) const { +void CommandMangler::operator()(tooling::CompileCommand &Command, + llvm::StringRef File) const { + std::vector &Cmd = Command.CommandLine; trace::Span S("AdjustCompileFlags"); // Most of the modifications below assumes the Cmd starts with a driver name. // We might consider injecting a generic driver name like "cc" or "c++", but @@ -340,16 +340,6 @@ void CommandMangler::adjust(std::vector &Cmd, } } -CommandMangler::operator clang::tooling::ArgumentsAdjuster() && { - // ArgumentsAdjuster is a std::function and so must be copyable. - return [Mangler = std::make_shared(std::move(*this))]( - const std::vector &Args, llvm::StringRef File) { - auto Result = Args; - Mangler->adjust(Result, File); - return Result; - }; -} - // ArgStripper implementation namespace { diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h index 1cf30b7ae55d7..2139b0602809e 100644 --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -8,8 +8,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H +#include "GlobalCompilationDatabase.h" #include "support/Threading.h" -#include "clang/Tooling/ArgumentsAdjusters.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include @@ -42,11 +42,14 @@ struct CommandMangler { // - on mac, find clang and isysroot by querying the `xcrun` launcher static CommandMangler detect(); - void adjust(std::vector &Cmd, llvm::StringRef File) const; - explicit operator clang::tooling::ArgumentsAdjuster() &&; + // `Cmd` may describe compilation of a different file, and will be updated + // for parsing `TargetFile`. + void operator()(tooling::CompileCommand &Cmd, + llvm::StringRef TargetFile) const; private: CommandMangler() = default; + Memoize> ResolvedDrivers; Memoize> ResolvedDriversNoFollow; }; diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 824a7027b4d89..c1c4897430d9d 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -740,8 +740,8 @@ DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const { OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base, std::vector FallbackFlags, - tooling::ArgumentsAdjuster Adjuster) - : DelegatingCDB(Base), ArgsAdjuster(std::move(Adjuster)), + CommandMangler Mangler) + : DelegatingCDB(Base), Mangler(std::move(Mangler)), FallbackFlags(std::move(FallbackFlags)) {} llvm::Optional @@ -757,8 +757,8 @@ OverlayCDB::getCompileCommand(PathRef File) const { Cmd = DelegatingCDB::getCompileCommand(File); if (!Cmd) return llvm::None; - if (ArgsAdjuster) - Cmd->CommandLine = ArgsAdjuster(Cmd->CommandLine, File); + if (Mangler) + Mangler(*Cmd, File); return Cmd; } @@ -767,8 +767,8 @@ tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const { std::lock_guard Lock(Mutex); Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(), FallbackFlags.end()); - if (ArgsAdjuster) - Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, File); + if (Mangler) + Mangler(Cmd, File); return Cmd; } diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index e71e4368f06b3..ae8ef97e1ebd2 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -15,6 +15,7 @@ #include "support/ThreadsafeFS.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include @@ -171,12 +172,17 @@ getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, /// using an in-memory mapping. class OverlayCDB : public DelegatingCDB { public: + // Makes adjustments to a tooling::CompileCommand which will be used to + // process a file (possibly different from the one in the command). + using CommandMangler = llvm::unique_function; + // Base may be null, in which case no entries are inherited. // FallbackFlags are added to the fallback compile command. // Adjuster is applied to all commands, fallback or not. OverlayCDB(const GlobalCompilationDatabase *Base, std::vector FallbackFlags = {}, - tooling::ArgumentsAdjuster Adjuster = nullptr); + CommandMangler Mangler = nullptr); llvm::Optional getCompileCommand(PathRef File) const override; @@ -190,7 +196,7 @@ class OverlayCDB : public DelegatingCDB { private: mutable std::mutex Mutex; llvm::StringMap Commands; /* GUARDED_BY(Mut) */ - tooling::ArgumentsAdjuster ArgsAdjuster; + CommandMangler Mangler; std::vector FallbackFlags; }; diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp index 7393984b984ba..9070582801f21 100644 --- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp +++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp @@ -147,7 +147,14 @@ int main(int argc, const char **argv) { auto Err = Executor->get()->execute( std::make_unique(Data), clang::tooling::ArgumentsAdjuster( - clang::clangd::CommandMangler::detect())); + [Mangler = std::make_shared( + clang::clangd::CommandMangler::detect())]( + const std::vector &Args, llvm::StringRef File) { + clang::tooling::CompileCommand Cmd; + Cmd.CommandLine = Args; + Mangler->operator()(Cmd, File); + return Cmd.CommandLine; + })); if (Err) { clang::clangd::elog("{0}", std::move(Err)); } diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp index 46752e2135639..64ada30c084fa 100644 --- a/clang-tools-extra/clangd/tool/Check.cpp +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -107,8 +107,7 @@ class Checker { if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; auto CDB = std::make_unique( - BaseCDB.get(), std::vector{}, - tooling::ArgumentsAdjuster(std::move(Mangler))); + BaseCDB.get(), std::vector{}, std::move(Mangler)); if (auto TrueCmd = CDB->getCompileCommand(File)) { Cmd = std::move(*TrueCmd); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 8fdc5be68934d..7a1fb9863af1a 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -146,7 +146,7 @@ TEST_F(BackgroundIndexTest, Config) { MemoryShardStorage MSS(Storage, CacheHits); // We need the CommandMangler, because that applies the config we're testing. OverlayCDB CDB(/*Base=*/nullptr, /*FallbackFlags=*/{}, - tooling::ArgumentsAdjuster(CommandMangler::forTests())); + CommandMangler::forTests()); BackgroundIndex Idx( FS, CDB, [&](llvm::StringRef) { return &MSS; }, std::move(Opts)); diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp index fa620fda557b8..d3399f4d98e0b 100644 --- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp @@ -350,7 +350,7 @@ TEST(ClangdServerTest, RespectsConfig) { Opts.ContextProvider = ClangdServer::createConfiguredContextProvider(&CfgProvider, nullptr); OverlayCDB CDB(/*Base=*/nullptr, /*FallbackFlags=*/{}, - tooling::ArgumentsAdjuster(CommandMangler::forTests())); + CommandMangler::forTests()); MockFS FS; ClangdServer Server(CDB, FS, Opts); // foo.cc sees the expected definition, as FOO is defined. diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp index 28ae6ea01e87a..504487d4e73d1 100644 --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -45,41 +45,47 @@ TEST(CommandMangler, Everything) { Mangler.ClangPath = testPath("fake/clang"); Mangler.ResourceDir = testPath("fake/resources"); Mangler.Sysroot = testPath("fake/sysroot"); - std::vector Cmd = {"clang++", "--", "foo.cc", "bar.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(Cmd, ElementsAre(testPath("fake/clang++"), - "-resource-dir=" + testPath("fake/resources"), - "-isysroot", testPath("fake/sysroot"), "--", - "foo.cc")); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "--", "foo.cc", "bar.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(Cmd.CommandLine, + ElementsAre(testPath("fake/clang++"), + "-resource-dir=" + testPath("fake/resources"), + "-isysroot", testPath("fake/sysroot"), "--", + "foo.cc")); } TEST(CommandMangler, FilenameMismatch) { auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("clang"); // Our compile flags refer to foo.cc... - std::vector Cmd = {"clang", "foo.cc"}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "foo.cc"}; // but we're applying it to foo.h... - Mangler.adjust(Cmd, "foo.h"); + Mangler(Cmd, "foo.h"); // so transferCompileCommand should add -x c++-header to preserve semantics. - EXPECT_THAT( - Cmd, ElementsAre(testPath("clang"), "-x", "c++-header", "--", "foo.h")); + EXPECT_THAT(Cmd.CommandLine, ElementsAre(testPath("clang"), "-x", + "c++-header", "--", "foo.h")); } TEST(CommandMangler, ResourceDir) { auto Mangler = CommandMangler::forTests(); Mangler.ResourceDir = testPath("fake/resources"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(Cmd, Contains("-resource-dir=" + testPath("fake/resources"))); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(Cmd.CommandLine, + Contains("-resource-dir=" + testPath("fake/resources"))); } TEST(CommandMangler, Sysroot) { auto Mangler = CommandMangler::forTests(); Mangler.Sysroot = testPath("fake/sysroot"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(llvm::join(Cmd, " "), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(llvm::join(Cmd.CommandLine, " "), HasSubstr("-isysroot " + testPath("fake/sysroot"))); } @@ -87,21 +93,22 @@ TEST(CommandMangler, ClangPath) { auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("fake/clang++"), Cmd.front()); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("fake/clang++"), Cmd.CommandLine.front()); - Cmd = {"unknown-binary", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front()); + Cmd.CommandLine = {"unknown-binary", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.CommandLine.front()); - Cmd = {testPath("path/clang++"), "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("path/clang++"), Cmd.front()); + Cmd.CommandLine = {testPath("path/clang++"), "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("path/clang++"), Cmd.CommandLine.front()); - Cmd = {"foo/unknown-binary", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ("foo/unknown-binary", Cmd.front()); + Cmd.CommandLine = {"foo/unknown-binary", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ("foo/unknown-binary", Cmd.CommandLine.front()); } // Only run the PATH/symlink resolving test on unix, we need to fiddle @@ -142,10 +149,11 @@ TEST(CommandMangler, ClangPathResolve) { // Test the case where the driver is an absolute path to a symlink. auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); - std::vector Cmd = {(TempDir + "/bin/foo").str(), "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {(TempDir + "/bin/foo").str(), "foo.cc"}; + Mangler(Cmd, "foo.cc"); // Directory based on resolved symlink, basename preserved. - EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.CommandLine.front()); // Set PATH to point to temp/bin so we can find 'foo' on it. ASSERT_TRUE(::getenv("PATH")); @@ -159,21 +167,22 @@ TEST(CommandMangler, ClangPathResolve) { Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); // Driver found on PATH. - Cmd = {"foo", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); + Cmd.CommandLine = {"foo", "foo.cc"}; + Mangler(Cmd, "foo.cc"); // Found the symlink and resolved the path as above. - EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.CommandLine.front()); // Symlink not resolved with -no-canonical-prefixes. - Cmd = {"foo", "-no-canonical-prefixes", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.front()); + Cmd.CommandLine = {"foo", "-no-canonical-prefixes", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.CommandLine.front()); } #endif TEST(CommandMangler, ConfigEdits) { auto Mangler = CommandMangler::forTests(); - std::vector Cmd = {"clang++", "foo.cc"}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; { Config Cfg; Cfg.CompileFlags.Edits.push_back([](std::vector &Argv) { @@ -185,11 +194,11 @@ TEST(CommandMangler, ConfigEdits) { Argv = tooling::getInsertArgumentAdjuster("--hello")(Argv, ""); }); WithContextValue WithConfig(Config::Key, std::move(Cfg)); - Mangler.adjust(Cmd, "foo.cc"); + Mangler(Cmd, "foo.cc"); } // Edits are applied in given order and before other mangling and they always // go before filename. - EXPECT_THAT(Cmd, ElementsAre(_, "--hello", "--", "FOO.CC")); + EXPECT_THAT(Cmd.CommandLine, ElementsAre(_, "--hello", "--", "FOO.CC")); } static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) { @@ -363,70 +372,75 @@ TEST(PrintArgvTest, All) { TEST(CommandMangler, InputsAfterDashDash) { const auto Mangler = CommandMangler::forTests(); { - std::vector Args = {"clang", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "/Users/foo.cc")); - EXPECT_THAT(llvm::makeArrayRef(Args).drop_back(2), + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), Not(Contains("/Users/foo.cc"))); } // In CL mode /U triggers an undef operation, hence `/Users/foo.cc` shouldn't // be interpreted as a file. { - std::vector Args = {"clang", "--driver-mode=cl", "bar.cc", - "/Users/foo.cc"}; - Mangler.adjust(Args, "bar.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "--driver-mode=cl", "bar.cc", "/Users/foo.cc"}; + Mangler(Cmd, "bar.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "bar.cc")); - EXPECT_THAT(llvm::makeArrayRef(Args).drop_back(2), Not(Contains("bar.cc"))); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), + Not(Contains("bar.cc"))); } // All inputs but the main file is dropped. { - std::vector Args = {"clang", "foo.cc", "bar.cc"}; - Mangler.adjust(Args, "baz.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "foo.cc", "bar.cc"}; + Mangler(Cmd, "baz.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "baz.cc")); EXPECT_THAT( - llvm::makeArrayRef(Args).drop_back(2), + llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), testing::AllOf(Not(Contains("foo.cc")), Not(Contains("bar.cc")))); } } TEST(CommandMangler, StripsMultipleArch) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = {"clang", "-arch", "foo", - "-arch", "bar", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_EQ( - llvm::count_if(Args, [](llvm::StringRef Arg) { return Arg == "-arch"; }), - 0); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "-arch", "foo", "-arch", "bar", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_EQ(llvm::count_if(Cmd.CommandLine, + [](llvm::StringRef Arg) { return Arg == "-arch"; }), + 0); // Single arch option is preserved. - Args = {"clang", "-arch", "foo", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_EQ( - llvm::count_if(Args, [](llvm::StringRef Arg) { return Arg == "-arch"; }), - 1); + Cmd.CommandLine = {"clang", "-arch", "foo", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_EQ(llvm::count_if(Cmd.CommandLine, + [](llvm::StringRef Arg) { return Arg == "-arch"; }), + 1); } TEST(CommandMangler, EmptyArgs) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = {}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {}; // Make sure we don't crash. - Mangler.adjust(Args, "foo.cc"); + Mangler(Cmd, "foo.cc"); } TEST(CommandMangler, PathsAsPositional) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = { + tooling::CompileCommand Cmd; + Cmd.CommandLine = { "clang", "--driver-mode=cl", "-I", "foo", }; // Make sure we don't crash. - Mangler.adjust(Args, "a.cc"); - EXPECT_THAT(Args, Contains("foo")); + Mangler(Cmd, "a.cc"); + EXPECT_THAT(Cmd.CommandLine, Contains("foo")); } } // namespace } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp index 554cb0484a071..22ee0921b6552 100644 --- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -138,11 +138,9 @@ TEST_F(OverlayCDBTest, Watch) { TEST_F(OverlayCDBTest, Adjustments) { OverlayCDB CDB(Base.get(), {"-DFallback"}, - [](const std::vector &Cmd, llvm::StringRef File) { - auto Ret = Cmd; - Ret.push_back( + [](tooling::CompileCommand &Cmd, llvm::StringRef File) { + Cmd.CommandLine.push_back( ("-DAdjust_" + llvm::sys::path::filename(File)).str()); - return Ret; }); // Command from underlying gets adjusted. auto Cmd = *CDB.getCompileCommand(testPath("foo.cc")); diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp index 03f1cd77191d2..761d3ca60a1a1 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -64,7 +64,7 @@ ParseInputs TestTU::inputs(MockFS &FS) const { Argv.push_back(FullFilename); auto Mangler = CommandMangler::forTests(); - Mangler.adjust(Inputs.CompileCommand.CommandLine, FullFilename); + Mangler(Inputs.CompileCommand, FullFilename); Inputs.CompileCommand.Filename = FullFilename; Inputs.CompileCommand.Directory = testRoot(); Inputs.Contents = Code; From 68e230aa29f71ed840a0ea9c0be97c8c6ead1c69 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Mon, 12 Sep 2022 04:29:29 -0400 Subject: [PATCH 477/516] [clangd] Perform system include extraction inside CommandMangler It needs to run after edits from config files are applied to the compile command (because the config file may specify the compiler), and before resolveDriver() runs at the end of CommandMangler. As part of this change, QueryDriverDatabase is renamed to SystemIncludeExtractor and is no longer a GlobalCompilationDatabase. Fixes https://github.com/clangd/clangd/issues/1089 Fixes https://github.com/clangd/clangd/issues/1173 Fixes https://github.com/clangd/clangd/issues/1263 Differential Revision: https://reviews.llvm.org/D133757 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 5 ++- clang-tools-extra/clangd/CompileCommands.cpp | 11 +++++ clang-tools-extra/clangd/CompileCommands.h | 1 + .../clangd/GlobalCompilationDatabase.h | 11 ++--- .../clangd/QueryDriverDatabase.cpp | 44 ++++++++----------- .../clangd/test/system-include-extractor.test | 19 +++++++- clang-tools-extra/clangd/tool/Check.cpp | 4 +- 7 files changed, 59 insertions(+), 36 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index b84d1c706d406..01cd178c5b35c 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -502,10 +502,10 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, CDBOpts.ContextProvider = Opts.ContextProvider; BaseCDB = std::make_unique(CDBOpts); - BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), - std::move(BaseCDB)); } auto Mangler = CommandMangler::detect(); + Mangler.SystemIncludeExtractor = + getSystemIncludeExtractor(llvm::makeArrayRef(Opts.QueryDriverGlobs)); if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, @@ -1815,5 +1815,6 @@ void ClangdLSPServer::onSemanticsMaybeChanged(PathRef File) { }); } } + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 47e92bdf4ec37..e84eb0aa30328 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -301,6 +301,17 @@ void CommandMangler::operator()(tooling::CompileCommand &Command, for (auto &Edit : Config::current().CompileFlags.Edits) Edit(Cmd); + // The system include extractor needs to run: + // - AFTER transferCompileCommand(), because the -x flag it adds may be + // necessary for the system include extractor to identify the file type + // - AFTER applying CompileFlags.Edits, because the name of the compiler + // that needs to be invoked may come from the CompileFlags->Compiler key + // - BEFORE resolveDriver() because that can mess up the driver path, + // e.g. changing gcc to /path/to/clang/bin/gcc + if (SystemIncludeExtractor) { + SystemIncludeExtractor(Command, File); + } + // Check whether the flag exists, either as -flag or -flag=* auto Has = [&](llvm::StringRef Flag) { for (llvm::StringRef Arg : Cmd) { diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h index 2139b0602809e..3cf41afd4ccf1 100644 --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -32,6 +32,7 @@ struct CommandMangler { llvm::Optional ResourceDir; // Root for searching for standard library (passed to -isysroot). llvm::Optional Sysroot; + SystemIncludeExtractorFn SystemIncludeExtractor; // A command-mangler that doesn't know anything about the system. // This is hermetic for unit-tests, but won't work well in production. diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index ae8ef97e1ebd2..c0d751f82f9bb 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -162,11 +162,12 @@ class DirectoryBasedGlobalCompilationDatabase }; /// Extracts system include search path from drivers matching QueryDriverGlobs -/// and adds them to the compile flags. Base may not be nullptr. -/// Returns Base when \p QueryDriverGlobs is empty. -std::unique_ptr -getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base); +/// and adds them to the compile flags. +/// Returns null when \p QueryDriverGlobs is empty. +using SystemIncludeExtractorFn = llvm::unique_function; +SystemIncludeExtractorFn +getSystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs); /// Wraps another compilation database, and supports overriding the commands /// using an in-memory mapping. diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp index c36fb4f042a9f..c5c5c84b05fbe 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp @@ -315,24 +315,20 @@ llvm::Regex convertGlobsToRegex(llvm::ArrayRef Globs) { /// Extracts system includes from a trusted driver by parsing the output of /// include search path and appends them to the commands coming from underlying /// compilation database. -class QueryDriverDatabase : public DelegatingCDB { +class SystemIncludeExtractor { public: - QueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base) - : DelegatingCDB(std::move(Base)), - QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} + SystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs) + : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} - llvm::Optional - getCompileCommand(PathRef File) const override { - auto Cmd = DelegatingCDB::getCompileCommand(File); - if (!Cmd || Cmd->CommandLine.empty()) - return Cmd; + void operator()(tooling::CompileCommand &Cmd, llvm::StringRef File) const { + if (Cmd.CommandLine.empty()) + return; llvm::StringRef Lang; - for (size_t I = 0, E = Cmd->CommandLine.size(); I < E; ++I) { - llvm::StringRef Arg = Cmd->CommandLine[I]; + for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) { + llvm::StringRef Arg = Cmd.CommandLine[I]; if (Arg == "-x" && I + 1 < E) - Lang = Cmd->CommandLine[I + 1]; + Lang = Cmd.CommandLine[I + 1]; else if (Arg.startswith("-x")) Lang = Arg.drop_front(2).trim(); } @@ -341,26 +337,25 @@ class QueryDriverDatabase : public DelegatingCDB { auto Type = driver::types::lookupTypeForExtension(Ext); if (Type == driver::types::TY_INVALID) { elog("System include extraction: invalid file type for {0}", Ext); - return Cmd; + return; } Lang = driver::types::getTypeName(Type); } - llvm::SmallString<128> Driver(Cmd->CommandLine.front()); + llvm::SmallString<128> Driver(Cmd.CommandLine.front()); if (llvm::any_of(Driver, [](char C) { return llvm::sys::path::is_separator(C); })) // Driver is a not a single executable name but instead a path (either // relative or absolute). - llvm::sys::fs::make_absolute(Cmd->Directory, Driver); + llvm::sys::fs::make_absolute(Cmd.Directory, Driver); if (auto Info = QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] { return extractSystemIncludesAndTarget( - Driver, Lang, Cmd->CommandLine, QueryDriverRegex); + Driver, Lang, Cmd.CommandLine, QueryDriverRegex); })) { - setTarget(addSystemIncludes(*Cmd, Info->SystemIncludes), Info->Target); + setTarget(addSystemIncludes(Cmd, Info->SystemIncludes), Info->Target); } - return Cmd; } private: @@ -370,14 +365,11 @@ class QueryDriverDatabase : public DelegatingCDB { }; } // namespace -std::unique_ptr -getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base) { - assert(Base && "Null base to SystemIncludeExtractor"); +SystemIncludeExtractorFn +getSystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs) { if (QueryDriverGlobs.empty()) - return Base; - return std::make_unique(QueryDriverGlobs, - std::move(Base)); + return nullptr; + return SystemIncludeExtractor(QueryDriverGlobs); } } // namespace clangd diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test index b109aa67aad1c..ba6aaf6efb9de 100644 --- a/clang-tools-extra/clangd/test/system-include-extractor.test +++ b/clang-tools-extra/clangd/test/system-include-extractor.test @@ -40,7 +40,9 @@ # RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %t.test.1 > %t.test # Bless the mock driver we've just created so that clangd can execute it. -# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test | FileCheck -strict-whitespace %t.test +# Note: include clangd's stderr in the FileCheck input with "2>&1" so that we +# can match output lines like "ASTWorker building file" +# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test 2>&1 | FileCheck -strict-whitespace %t.test {"jsonrpc":"2.0","id":0,"method":"initialize","params":{}} --- { @@ -55,10 +57,25 @@ } } } +# Look for the "ASTWorker building file" line so that the subsequent diagnostics +# that are matches are for the C++ source file and not a config file. +# CHECK: ASTWorker building file # CHECK: "method": "textDocument/publishDiagnostics", # CHECK-NEXT: "params": { # CHECK-NEXT: "diagnostics": [], +# CHECK-NEXT: "uri": "file://INPUT_DIR/the-file.cpp", --- {"jsonrpc":"2.0","id":10000,"method":"shutdown"} --- {"jsonrpc":"2.0","method":"exit"} + +# Generate a different compile_commands.json which does not point to the mock driver +# RUN: echo '[{"directory": "%/t.dir", "command": "gcc the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot=/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json + +# Generate a clangd config file which points to the mock driver instead +# RUN: echo 'CompileFlags:' > %t.dir/.clangd +# RUN: echo ' Compiler: my_driver.sh' >> %t.dir/.clangd + +# Run clangd a second time, to make sure it picks up the driver name from the config file +# Note, we need to pass -enable-config because -lit-test otherwise disables it +# RUN: clangd -lit-test -enable-config -query-driver="**.test,**.sh" < %t.test 2>&1 | FileCheck -strict-whitespace %t.test diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp index 64ada30c084fa..d216c9d08e89a 100644 --- a/clang-tools-extra/clangd/tool/Check.cpp +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -101,9 +101,9 @@ class Checker { Config::current().CompileFlags.CDBSearch.FixedCDBPath; std::unique_ptr BaseCDB = std::make_unique(CDBOpts); - BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), - std::move(BaseCDB)); auto Mangler = CommandMangler::detect(); + Mangler.SystemIncludeExtractor = + getSystemIncludeExtractor(llvm::makeArrayRef(Opts.QueryDriverGlobs)); if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; auto CDB = std::make_unique( From 428ac8f3a0f9572fcff03c2fae62c4ae8420a0df Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Fri, 4 Nov 2022 04:39:46 -0400 Subject: [PATCH 478/516] [clangd] Rename QueryDriverDatabase.cpp to SystemIncludeExtractor.cpp Differential Revision: https://reviews.llvm.org/D137401 --- clang-tools-extra/clangd/CMakeLists.txt | 2 +- .../{QueryDriverDatabase.cpp => SystemIncludeExtractor.cpp} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename clang-tools-extra/clangd/{QueryDriverDatabase.cpp => SystemIncludeExtractor.cpp} (98%) diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index df8ad666e2da0..dec115a64a59e 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -97,7 +97,7 @@ add_clang_library(clangDaemon SemanticHighlighting.cpp SemanticSelection.cpp SourceCode.cpp - QueryDriverDatabase.cpp + SystemIncludeExtractor.cpp TidyProvider.cpp TUScheduler.cpp URI.cpp diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp similarity index 98% rename from clang-tools-extra/clangd/QueryDriverDatabase.cpp rename to clang-tools-extra/clangd/SystemIncludeExtractor.cpp index c5c5c84b05fbe..7cfbd3dbf7318 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -1,4 +1,4 @@ -//===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===// +//===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -344,7 +344,7 @@ class SystemIncludeExtractor { llvm::SmallString<128> Driver(Cmd.CommandLine.front()); if (llvm::any_of(Driver, - [](char C) { return llvm::sys::path::is_separator(C); })) + [](char C) { return llvm::sys::path::is_separator(C); })) // Driver is a not a single executable name but instead a path (either // relative or absolute). llvm::sys::fs::make_absolute(Cmd.Directory, Driver); From f64802e8d3e9db299cad913ffcb734c8d35dc5f0 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 2 Nov 2022 09:25:59 -0700 Subject: [PATCH 479/516] [Clang][AArch64][Darwin] Enable GlobalISel by default for Darwin ARM64 platforms. Differential Revision: https://reviews.llvm.org/D137269 --- clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++++++++--- clang/lib/Driver/ToolChains/Darwin.cpp | 9 +++++---- clang/test/Driver/global-isel.c | 2 ++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index afb92dae27d35..bffc8dc611605 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7209,15 +7209,29 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (SplitLTOUnit) CmdArgs.push_back("-fsplit-lto-unit"); - if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, - options::OPT_fno_global_isel)) { + A = Args.getLastArg(options::OPT_fglobal_isel, options::OPT_fno_global_isel); + // If a configuration is fully supported, we don't issue any warnings or + // remarks. + bool IsFullySupported = getToolChain().getTriple().isOSDarwin() && + Triple.getArch() == llvm::Triple::aarch64; + if (IsFullySupported) { + if (A && A->getOption().matches(options::OPT_fno_global_isel)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel=0"); + } else { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel=1"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel-abort=0"); + } + } else if (A) { CmdArgs.push_back("-mllvm"); if (A->getOption().matches(options::OPT_fglobal_isel)) { CmdArgs.push_back("-global-isel=1"); // GISel is on by default on AArch64 -O0, so don't bother adding // the fallback remarks for it. Other combinations will add a warning of - // some kind. + // some kind, unless we're on Darwin. bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64; bool IsOptLevelSupported = false; diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 39f459e9ef652..661764e6eb00b 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -381,10 +381,11 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain); } - // If GlobalISel is enabled, pass it through to LLVM. - if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, - options::OPT_fno_global_isel)) { - if (A->getOption().matches(options::OPT_fglobal_isel)) { + // GlobalISel is enabled by default on AArch64 Darwin. + if (getToolChain().getArch() == llvm::Triple::aarch64) { + Arg *A = Args.getLastArg(options::OPT_fglobal_isel, + options::OPT_fno_global_isel); + if (!A || !A->getOption().matches(options::OPT_fno_global_isel)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-global-isel"); // Disable abort and fall back to SDAG silently. diff --git a/clang/test/Driver/global-isel.c b/clang/test/Driver/global-isel.c index 66f196b03c1ea..0d3bd2b2fe262 100644 --- a/clang/test/Driver/global-isel.c +++ b/clang/test/Driver/global-isel.c @@ -6,6 +6,7 @@ // RUN: %clang -target aarch64 -fglobal-isel -S %s -### 2>&1 | FileCheck --check-prefix=ARM64-DEFAULT %s // RUN: %clang -target aarch64 -fglobal-isel -S -O0 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O0 %s // RUN: %clang -target aarch64 -fglobal-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2 %s +// RUN: %clang -arch arm64 -fglobal-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefixes=DARWIN-ARM64-O2,ENABLED %s // RUN: %clang -target aarch64 -fglobal-isel -Wno-global-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2-NOWARN %s // RUN: %clang -target x86_64 -fglobal-isel -S %s -### 2>&1 | FileCheck --check-prefix=X86_64 %s @@ -27,6 +28,7 @@ // ARM64-DEFAULT-NOT: warning: -fglobal-isel // ARM64-DEFAULT-NOT: "-global-isel-abort=2" // ARM64-O0-NOT: warning: -fglobal-isel +// DARWIN-ARM64-O2-NOT: warning: -fglobal-isel // ARM64-O2: warning: -fglobal-isel support is incomplete for this architecture at the current optimization level // ARM64-O2: "-mllvm" "-global-isel-abort=2" // ARM64-O2-NOWARN-NOT: warning: -fglobal-isel From 41ce74e6e983f523d44d3a80be5ae778c35df85a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 7 Nov 2022 22:55:24 +0000 Subject: [PATCH 480/516] [Clang][Sema] Add -Wincompatible-function-pointer-types-strict Clang supports indirect call Control-Flow Integrity (CFI) sanitizers (e.g. -fsanitize=cfi-icall), which enforce an exact type match between a function pointer and the target function. Unfortunately, Clang doesn't provide diagnostics that help developers avoid function pointer assignments that can lead to runtime CFI failures. -Wincompatible-function-pointer-types doesn't warn about enum to integer mismatches if the types are otherwise compatible, for example, which isn't sufficient with CFI. Add -Wincompatible-function-pointer-types-strict, which checks for a stricter function type compatibility in assignments and helps warn about assignments that can potentially lead to CFI failures. Reviewed By: aaron.ballman, nickdesaulniers Differential Revision: https://reviews.llvm.org/D136790 --- clang/docs/ReleaseNotes.rst | 13 +++++++----- .../clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/include/clang/Sema/Sema.h | 6 ++++++ clang/lib/Sema/SemaExpr.cpp | 19 ++++++++++++++++-- ...compatible-function-pointer-types-strict.c | 20 +++++++++++++++++++ 5 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 clang/test/Sema/incompatible-function-pointer-types-strict.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 72b9a52c7cd0d..2ce5fd48ca13a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -366,11 +366,14 @@ Improvements to Clang's diagnostics - Clang now correctly points to the problematic parameter for the ``-Wnonnull`` warning. This fixes `Issue 58273 `_. -- Introduced ``-Wcast-function-type-strict`` to warn about function type mismatches - in casts that may result in runtime indirect call `Control-Flow Integrity (CFI) - `_ failures. This diagnostic - is grouped under ``-Wcast-function-type`` as it identifies a more strict set of - potentially problematic function type casts. +- Introduced ``-Wcast-function-type-strict`` and + ``-Wincompatible-function-pointer-types-strict`` to warn about function type + mismatches in casts and assignments that may result in runtime indirect call + `Control-Flow Integrity (CFI) + `_ failures. The + ``-Wcast-function-type-strict`` diagnostic is grouped under + ``-Wcast-function-type`` as it identifies a more strict set of potentially + problematic function type casts. - Clang will now disambiguate NTTP types when printing diagnostic that contain NTTP types. Fixes `Issue 57562 `_. - Better error recovery for pack expansion of expressions. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1b1db765fa7a9..eea38a4cab8a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8214,6 +8214,9 @@ def err_typecheck_convert_incompatible_function_pointer : Error< def ext_typecheck_convert_incompatible_function_pointer : ExtWarn< err_typecheck_convert_incompatible_function_pointer.Text>, InGroup, DefaultError; +def warn_typecheck_convert_incompatible_function_pointer_strict : Warning< + err_typecheck_convert_incompatible_function_pointer.Text>, + InGroup>, DefaultIgnore; def ext_typecheck_convert_discards_qualifiers : ExtWarn< "%select{%diff{assigning to $ from $|assigning to different types}0,1" "|%diff{passing $ to parameter of type $|" diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e8c9cb966bae7..25d9d2e0c3baa 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12228,6 +12228,12 @@ class Sema final { /// extension. IncompatibleFunctionPointer, + /// IncompatibleFunctionPointerStrict - The assignment is between two + /// function pointer types that are not identical, but are compatible, + /// unless compiled with -fsanitize=cfi, in which case the type mismatch + /// may trip an indirect call runtime check. + IncompatibleFunctionPointerStrict, + /// IncompatiblePointerSign - The assignment is between two pointers types /// which point to integers which have a different sign, but are otherwise /// identical. This is a subset of the above, but broken out because it's by diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index c40bcb083907b..1ba88ad6cc2a6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9240,7 +9240,8 @@ static bool IsInvalidCmseNSCallConversion(Sema &S, QualType FromType, // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType -checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { +checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType, + SourceLocation Loc) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); @@ -9309,6 +9310,13 @@ checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { return Sema::FunctionVoidPointer; } + if (!S.Diags.isIgnored( + diag::warn_typecheck_convert_incompatible_function_pointer_strict, + Loc) && + RHSType->isFunctionPointerType() && LHSType->isFunctionPointerType() && + !S.IsFunctionConversion(RHSType, LHSType, RHSType)) + return Sema::IncompatibleFunctionPointerStrict; + // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); @@ -9660,7 +9668,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, Kind = CK_NoOp; else Kind = CK_BitCast; - return checkPointerTypesForAssignment(*this, LHSType, RHSType); + return checkPointerTypesForAssignment(*this, LHSType, RHSType, + RHS.get()->getBeginLoc()); } // int -> T* @@ -16949,6 +16958,12 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; + case IncompatibleFunctionPointerStrict: + DiagKind = + diag::warn_typecheck_convert_incompatible_function_pointer_strict; + ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); + MayHaveConvFixit = true; + break; case IncompatibleFunctionPointer: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_incompatible_function_pointer; diff --git a/clang/test/Sema/incompatible-function-pointer-types-strict.c b/clang/test/Sema/incompatible-function-pointer-types-strict.c new file mode 100644 index 0000000000000..647251de42030 --- /dev/null +++ b/clang/test/Sema/incompatible-function-pointer-types-strict.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -fsyntax-only %s -Wincompatible-function-pointer-types-strict -verify=soft,strict +// RUN: %clang_cc1 -fsyntax-only %s -Werror=incompatible-function-pointer-types-strict -verify=hard,strict +// RUN: %clang_cc1 -fsyntax-only %s -Wincompatible-function-pointer-types -verify=nonstrict +// nonstrict-no-diagnostics + +enum E { A = -1, B }; +typedef enum E (*fn_a_t)(void); +typedef void (*fn_b_t)(void); + +int a(void) { return 0; } +void __attribute__((noreturn)) b(void) { while (1); } + +void fa(fn_a_t x) {} // strict-note {{passing argument to parameter 'x' here}} +void fb(fn_b_t x) {} + +void baz(void) { + fa(&a); // soft-warning {{incompatible function pointer types passing 'int (*)(void)' to parameter of type 'fn_a_t' (aka 'enum E (*)(void)')}} \ + hard-error {{incompatible function pointer types passing 'int (*)(void)' to parameter of type 'fn_a_t' (aka 'enum E (*)(void)')}} + fb(&b); // no-warning +} From d29d5ffb6332569e85d5eda5130603bbd8664635 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Mon, 7 Nov 2022 14:57:43 -0800 Subject: [PATCH 481/516] Revert "[Assignment Tracking][5.1/*] Add deleteAssignmentMarkers function" This reverts commit 4c44fa1c3829c2d0c6ce10b576dafbc2e0631d47. This patch has to be reverted because I need to revert 171f7024cc82e8702abebdedb699d37b50574be7 and without reverting this patch, reverting 171f7024cc82e8702abebdedb699d37b50574be7 causes conflicts. Patch 171f7024cc82e8702abebdedb699d37b50574be7 introduced a cyclic dependency in the module build. https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/48197/consoleFull#-69937453049ba4694-19c4-4d7e-bec5-911270d8a58c In file included from :1: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/Argument.h:18:10: fatal error: cyclic dependency in module 'LLVM_IR': LLVM_IR -> LLVM_intrinsic_gen -> LLVM_IR ^ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: While building module 'LLVM_IR' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57: In file included from :12: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/DebugInfo.h:24:10: fatal error: could not build module 'LLVM_intrinsic_gen' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: In file included from :15: In file included from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCContext.h:23: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57:10: fatal error: could not build module 'LLVM_IR' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~ /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14:10: fatal error: could not build module 'LLVM_MC' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ 4 errors generated. --- llvm/include/llvm/IR/DebugInfo.h | 3 --- llvm/lib/IR/DebugInfo.cpp | 9 --------- llvm/unittests/IR/DebugInfoTest.cpp | 22 +--------------------- 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 02f4da19c3463..f521228445464 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -216,9 +216,6 @@ inline AssignmentMarkerRange getAssignmentMarkers(const Instruction *Inst) { return make_range(Value::user_iterator(), Value::user_iterator()); } -/// Delete the llvm.dbg.assign intrinsics linked to \p Inst. -void deleteAssignmentMarkers(const Instruction *Inst); - /// Replace all uses (and attachments) of \p Old with \p New. void RAUW(DIAssignID *Old, DIAssignID *New); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 64d606ec15a60..bfa8bc7a935e9 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1673,15 +1673,6 @@ AssignmentMarkerRange at::getAssignmentMarkers(DIAssignID *ID) { return make_range(IDAsValue->user_begin(), IDAsValue->user_end()); } -void at::deleteAssignmentMarkers(const Instruction *Inst) { - auto Range = getAssignmentMarkers(Inst); - if (Range.empty()) - return; - SmallVector ToDelete(Range.begin(), Range.end()); - for (auto *DAI : ToDelete) - DAI->eraseFromParent(); -} - void at::RAUW(DIAssignID *Old, DIAssignID *New) { // Replace MetadataAsValue uses. if (auto *OldIDAsValue = diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index e58b4f562e591..7cdd3ae2bb849 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -398,13 +398,6 @@ TEST(AssignmentTrackingTest, Utils) { ret void, !dbg !19 } - define dso_local void @fun3() !dbg !21 { - entry: - %local = alloca i32, align 4, !DIAssignID !24 - call void @llvm.dbg.assign(metadata i32 undef, metadata !22, metadata !DIExpression(), metadata !24, metadata i32* undef, metadata !DIExpression()), !dbg !23 - ret void - } - declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) !llvm.dbg.cu = !{!0} @@ -432,10 +425,6 @@ TEST(AssignmentTrackingTest, Utils) { !18 = !DILocalVariable(name: "local2", scope: !17, file: !1, line: 2, type: !11) !19 = !DILocation(line: 4, column: 1, scope: !17) !20 = distinct !DIAssignID() - !21 = distinct !DISubprogram(name: "fun3", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) - !22 = !DILocalVariable(name: "local4", scope: !21, file: !1, line: 2, type: !11) - !23 = !DILocation(line: 4, column: 1, scope: !21) - !24 = distinct !DIAssignID() )"); // Check the test IR isn't malformed. @@ -494,16 +483,7 @@ TEST(AssignmentTrackingTest, Utils) { ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); EXPECT_EQ(*Fun2Insts.begin(), &Fun2Alloca); - // 3. Check that deleting dbg.assigns from a specific instruction works. - Instruction &Fun3Alloca = - *M->getFunction("fun3")->getEntryBlock().getFirstNonPHIOrDbg(); - auto Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); - ASSERT_TRUE(std::distance(Fun3Markers.begin(), Fun3Markers.end()) == 1); - at::deleteAssignmentMarkers(&Fun3Alloca); - Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); - EXPECT_EQ(Fun3Markers.empty(), true); - - // 4. Check that deleting works and applies only to the target function. + // 3. Check that deleting works and applies only to the target function. at::deleteAll(&Fun1); // There should now only be the alloca and ret in fun1. EXPECT_EQ(Fun1.begin()->size(), 2u); From 4c37a413e582de06b8d3ecd233588fc341ce95e0 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Mon, 7 Nov 2022 14:59:03 -0800 Subject: [PATCH 482/516] Revert "Fix warning: comparison of integers of different signs" This reverts commit 028df7fab11bd8c26d8f5689e049186eb8b39092. I am reverting this patch because I need to revert 171f7024cc82e8702abebdedb699d37b50574be7 and without reverting this patch, reverting 171f7024cc82e8702abebdedb699d37b50574be7 causes conflicts. Patch 171f7024cc82e8702abebdedb699d37b50574be7 introduced a cyclic dependancy in the module build. https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/48197/consoleFull#-69937453049ba4694-19c4-4d7e-bec5-911270d8a58c In file included from :1: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/Argument.h:18:10: fatal error: cyclic dependency in module 'LLVM_IR': LLVM_IR -> LLVM_intrinsic_gen -> LLVM_IR ^ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: While building module 'LLVM_IR' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57: In file included from :12: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/DebugInfo.h:24:10: fatal error: could not build module 'LLVM_intrinsic_gen' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: In file included from :15: In file included from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCContext.h:23: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57:10: fatal error: could not build module 'LLVM_IR' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~ /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14:10: fatal error: could not build module 'LLVM_MC' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ 4 errors generated. --- llvm/unittests/IR/DebugInfoTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 7cdd3ae2bb849..9888bb6dd8e50 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -486,7 +486,7 @@ TEST(AssignmentTrackingTest, Utils) { // 3. Check that deleting works and applies only to the target function. at::deleteAll(&Fun1); // There should now only be the alloca and ret in fun1. - EXPECT_EQ(Fun1.begin()->size(), 2u); + EXPECT_EQ(Fun1.begin()->size(), 2); // fun2's alloca should have the same DIAssignID and remain linked to its // llvm.dbg.assign. EXPECT_EQ(Fun2ID, cast_or_null( From 41f5a0004e442ae71c8e754fdadb4bd1e172fb2d Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Mon, 7 Nov 2022 15:04:16 -0800 Subject: [PATCH 483/516] Revert "[Assignment Tracking][5/*] Add core infrastructure for instruction reference" This reverts commit 171f7024cc82e8702abebdedb699d37b50574be7. Reverting this patch because it causes a cyclic dependency in the module build https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/48197/consoleFull#-69937453049ba4694-19c4-4d7e-bec5-911270d8a58c In file included from :1: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/Argument.h:18:10: fatal error: cyclic dependency in module 'LLVM_IR': LLVM_IR -> LLVM_intrinsic_gen -> LLVM_IR ^ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: While building module 'LLVM_IR' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57: In file included from :12: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/DebugInfo.h:24:10: fatal error: could not build module 'LLVM_intrinsic_gen' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: In file included from :15: In file included from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCContext.h:23: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57:10: fatal error: could not build module 'LLVM_IR' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~ /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14:10: fatal error: could not build module 'LLVM_MC' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ 4 errors generated. --- llvm/include/llvm/AsmParser/LLParser.h | 6 - llvm/include/llvm/IR/DebugInfo.h | 63 +-------- llvm/include/llvm/IR/Instruction.h | 4 - llvm/lib/AsmParser/LLParser.cpp | 19 +-- llvm/lib/IR/DebugInfo.cpp | 60 --------- llvm/lib/IR/Instruction.cpp | 4 - llvm/lib/IR/LLVMContextImpl.h | 5 - llvm/lib/IR/Metadata.cpp | 41 ------ llvm/lib/IR/Verifier.cpp | 9 -- .../parse-and-verify/verify.ll | 8 -- llvm/unittests/IR/DebugInfoTest.cpp | 126 ------------------ 11 files changed, 3 insertions(+), 342 deletions(-) diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index e9813c34ce373..8757543071559 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -108,12 +108,6 @@ namespace llvm { SmallVector InstsWithTBAATag; - /// DIAssignID metadata does not support temporary RAUW so we cannot use - /// the normal metadata forward reference resolution method. Instead, - /// non-temporary DIAssignID are attached to instructions (recorded here) - /// then replaced later. - DenseMap> TempDIAssignIDAttachments; - // Type resolution handling data structures. The location is set when we // have processed a use of the type but not a definition yet. StringMap > NamedTypes; diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index f521228445464..705a2b2e86e68 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -21,7 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/DebugInfoMetadata.h" namespace llvm { @@ -163,67 +163,6 @@ class DebugInfoFinder { SmallPtrSet NodesSeen; }; -/// Assignment Tracking (at). -namespace at { -// -// Utilities for enumerating storing instructions from an assignment ID. -// -/// A range of instructions. -using AssignmentInstRange = - iterator_range::iterator>; -/// Return a range of instructions (typically just one) that have \p ID -/// as an attachment. -/// Iterators invalidated by adding or removing DIAssignID metadata to/from any -/// instruction (including by deleting or cloning instructions). -AssignmentInstRange getAssignmentInsts(DIAssignID *ID); -/// Return a range of instructions (typically just one) that perform the -/// assignment that \p DAI encodes. -/// Iterators invalidated by adding or removing DIAssignID metadata to/from any -/// instruction (including by deleting or cloning instructions). -inline AssignmentInstRange getAssignmentInsts(const DbgAssignIntrinsic *DAI) { - return getAssignmentInsts(cast(DAI->getAssignID())); -} - -// -// Utilities for enumerating llvm.dbg.assign intrinsic from an assignment ID. -// -/// High level: this is an iterator for llvm.dbg.assign intrinsics. -/// Implementation details: this is a wrapper around Value's User iterator that -/// dereferences to a DbgAssignIntrinsic ptr rather than a User ptr. -class DbgAssignIt - : public iterator_adaptor_base::iterator_category, - DbgAssignIntrinsic *, std::ptrdiff_t, - DbgAssignIntrinsic **, - DbgAssignIntrinsic *&> { -public: - DbgAssignIt(Value::user_iterator It) : iterator_adaptor_base(It) {} - DbgAssignIntrinsic *operator*() const { return cast(*I); } -}; -/// A range of llvm.dbg.assign intrinsics. -using AssignmentMarkerRange = iterator_range; -/// Return a range of dbg.assign intrinsics which use \ID as an operand. -/// Iterators invalidated by deleting an intrinsic contained in this range. -AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID); -/// Return a range of dbg.assign intrinsics for which \p Inst performs the -/// assignment they encode. -/// Iterators invalidated by deleting an intrinsic contained in this range. -inline AssignmentMarkerRange getAssignmentMarkers(const Instruction *Inst) { - if (auto *ID = Inst->getMetadata(LLVMContext::MD_DIAssignID)) - return getAssignmentMarkers(cast(ID)); - else - return make_range(Value::user_iterator(), Value::user_iterator()); -} - -/// Replace all uses (and attachments) of \p Old with \p New. -void RAUW(DIAssignID *Old, DIAssignID *New); - -/// Remove all Assignment Tracking related intrinsics and metadata from \p F. -void deleteAll(Function *F); - -} // end namespace at - /// Return true if assignment tracking is enabled. bool getEnableAssignmentTracking(); } // end namespace llvm diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 131a7414a1a7d..f85fcb93068fa 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -515,10 +515,6 @@ class Instruction : public User, void getAllMetadataImpl(SmallVectorImpl> &) const; - /// Update the LLVMContext ID-to-Instruction(s) mapping. If \p ID is nullptr - /// then clear the mapping for this instruction. - void updateDIAssignIDMapping(DIAssignID *ID); - public: //===--------------------------------------------------------------------===// // Predicates and helper methods. diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index c1835b3e3023c..25204847ca9ce 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -853,18 +853,7 @@ bool LLParser::parseStandaloneMetadata() { // See if this was forward referenced, if so, handle it. auto FI = ForwardRefMDNodes.find(MetadataID); if (FI != ForwardRefMDNodes.end()) { - auto *ToReplace = FI->second.first.get(); - // DIAssignID has its own special forward-reference "replacement" for - // attachments (the temporary attachments are never actually attached). - if (isa(Init)) { - for (auto *Inst : TempDIAssignIDAttachments[ToReplace]) { - assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID) && - "Inst unexpectedly already has DIAssignID attachment"); - Inst->setMetadata(LLVMContext::MD_DIAssignID, Init); - } - } - - ToReplace->replaceAllUsesWith(Init); + FI->second.first->replaceAllUsesWith(Init); ForwardRefMDNodes.erase(FI); assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work"); @@ -2093,11 +2082,7 @@ bool LLParser::parseInstructionMetadata(Instruction &Inst) { if (parseMetadataAttachment(MDK, N)) return true; - if (MDK == LLVMContext::MD_DIAssignID) - TempDIAssignIDAttachments[N].push_back(&Inst); - else - Inst.setMetadata(MDK, N); - + Inst.setMetadata(MDK, N); if (MDK == LLVMContext::MD_tbaa) InstsWithTBAATag.push_back(&Inst); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index bfa8bc7a935e9..fa11a222bcf5c 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm-c/DebugInfo.h" -#include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -38,7 +37,6 @@ #include using namespace llvm; -using namespace llvm::at; using namespace llvm::dwarf; static cl::opt @@ -1646,61 +1644,3 @@ LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) { return (LLVMMetadataKind)LLVMGenericDINodeMetadataKind; } } - -AssignmentInstRange at::getAssignmentInsts(DIAssignID *ID) { - assert(ID && "Expected non-null ID"); - LLVMContext &Ctx = ID->getContext(); - auto &Map = Ctx.pImpl->AssignmentIDToInstrs; - - auto MapIt = Map.find(ID); - if (MapIt == Map.end()) - return make_range(nullptr, nullptr); - - return make_range(MapIt->second.begin(), MapIt->second.end()); -} - -AssignmentMarkerRange at::getAssignmentMarkers(DIAssignID *ID) { - assert(ID && "Expected non-null ID"); - LLVMContext &Ctx = ID->getContext(); - - auto *IDAsValue = MetadataAsValue::getIfExists(Ctx, ID); - - // The ID is only used wrapped in MetadataAsValue(ID), so lets check that - // one of those already exists first. - if (!IDAsValue) - return make_range(Value::user_iterator(), Value::user_iterator()); - - return make_range(IDAsValue->user_begin(), IDAsValue->user_end()); -} - -void at::RAUW(DIAssignID *Old, DIAssignID *New) { - // Replace MetadataAsValue uses. - if (auto *OldIDAsValue = - MetadataAsValue::getIfExists(Old->getContext(), Old)) { - auto *NewIDAsValue = MetadataAsValue::get(Old->getContext(), New); - OldIDAsValue->replaceAllUsesWith(NewIDAsValue); - } - - // Replace attachments. - AssignmentInstRange InstRange = getAssignmentInsts(Old); - // Use intermediate storage for the instruction ptrs because the - // getAssignmentInsts range iterators will be invalidated by adding and - // removing DIAssignID attachments. - SmallVector InstVec(InstRange.begin(), InstRange.end()); - for (auto *I : InstVec) - I->setMetadata(LLVMContext::MD_DIAssignID, New); -} - -void at::deleteAll(Function *F) { - SmallVector ToDelete; - for (BasicBlock &BB : *F) { - for (Instruction &I : BB) { - if (auto *DAI = dyn_cast(&I)) - ToDelete.push_back(DAI); - else - I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); - } - } - for (auto *DAI : ToDelete) - DAI->eraseFromParent(); -} diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 74fc3416b564f..007e518a1a817 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -55,10 +55,6 @@ Instruction::~Instruction() { // instructions in a BasicBlock are deleted). if (isUsedByMetadata()) ValueAsMetadata::handleRAUW(this, UndefValue::get(getType())); - - // Explicitly remove DIAssignID metadata to clear up ID -> Instruction(s) - // mapping in LLVMContext. - setMetadata(LLVMContext::MD_DIAssignID, nullptr); } diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 3f4f222a0720e..0b1e5194222fc 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1499,11 +1499,6 @@ class LLVMContextImpl { /// Collection of metadata used in this context. DenseMap ValueMetadata; - /// Map DIAssignID -> Instructions with that attachment. - /// Managed by Instruction via Instruction::updateDIAssignIDMapping. - /// Query using the at:: functions defined in DebugInfo.h. - DenseMap> AssignmentIDToInstrs; - /// Collection of per-GlobalObject sections used in this context. DenseMap GlobalObjectSections; diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 5336902031898..052f3b1b37ded 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -1425,37 +1425,6 @@ void Instruction::dropUnknownNonDebugMetadata(ArrayRef KnownIDs) { } } -void Instruction::updateDIAssignIDMapping(DIAssignID *ID) { - auto &IDToInstrs = getContext().pImpl->AssignmentIDToInstrs; - if (const DIAssignID *CurrentID = - cast_or_null(getMetadata(LLVMContext::MD_DIAssignID))) { - // Nothing to do if the ID isn't changing. - if (ID == CurrentID) - return; - - // Unmap this instruction from its current ID. - auto InstrsIt = IDToInstrs.find(CurrentID); - assert(InstrsIt != IDToInstrs.end() && - "Expect existing attachment to be mapped"); - - auto &InstVec = InstrsIt->second; - auto *InstIt = std::find(InstVec.begin(), InstVec.end(), this); - assert(InstIt != InstVec.end() && - "Expect instruction to be mapped to attachment"); - // The vector contains a ptr to this. If this is the only element in the - // vector, remove the ID:vector entry, otherwise just remove the - // instruction from the vector. - if (InstVec.size() == 1) - IDToInstrs.erase(InstrsIt); - else - InstVec.erase(InstIt); - } - - // Map this instruction to the new ID. - if (ID) - IDToInstrs[ID].push_back(this); -} - void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (!Node && !hasMetadata()) return; @@ -1466,16 +1435,6 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { return; } - // Update DIAssignID to Instruction(s) mapping. - if (KindID == LLVMContext::MD_DIAssignID) { - // The DIAssignID tracking infrastructure doesn't support RAUWing temporary - // nodes with DIAssignIDs. The cast_or_null below would also catch this, but - // having a dedicated assert helps make this obvious. - assert((!Node || !Node->isTemporary()) && - "Temporary DIAssignIDs are invalid"); - updateDIAssignIDMapping(cast_or_null(Node)); - } - Value::setMetadata(KindID, Node); } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 002b5210830fe..ab6730c578a7b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -68,7 +68,6 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -4558,10 +4557,6 @@ void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { CheckDI(isa(User), "!DIAssignID should only be used by llvm.dbg.assign intrinsics", MD, User); - // All of the dbg.assign intrinsics should be in the same function as I. - if (auto *DAI = dyn_cast(User)) - CheckDI(DAI->getFunction() == I.getFunction(), - "dbg.assign not in same function as inst", DAI, &I); } } } @@ -6022,10 +6017,6 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { CheckDI(isa(DAI->getRawAddressExpression()), "invalid llvm.dbg.assign intrinsic address expression", &DII, DAI->getRawAddressExpression()); - // All of the linked instructions should be in the same function as DII. - for (Instruction *I : at::getAssignmentInsts(DAI)) - CheckDI(DAI->getFunction() == I->getFunction(), - "inst not in same function as dbg.assign", I, DAI); } // Ignore broken !dbg attachments; they're checked elsewhere. diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll index 9fa17e7f06ee1..577289604d536 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll @@ -6,13 +6,6 @@ ;; ;; Checks for this one are inline. -define dso_local void @fun2() !dbg !15 { - ;; DIAssignID copied here from @fun() where it is used by intrinsics. - ; CHECK: dbg.assign not in same function as inst - %x = alloca i32, align 4, !DIAssignID !14 - ret void -} - define dso_local void @fun() !dbg !7 { entry: %a = alloca i32, align 4, !DIAssignID !14 @@ -57,4 +50,3 @@ declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !13 = !DILocation(line: 1, column: 1, scope: !7) !14 = distinct !DIAssignID() -!15 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index 9888bb6dd8e50..524752168b091 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -368,130 +368,4 @@ TEST(DIBuilder, createDbgAddr) { EXPECT_EQ(MDExp->getNumElements(), 0u); } -TEST(AssignmentTrackingTest, Utils) { - // Test the assignment tracking utils defined in DebugInfo.h namespace at {}. - // This includes: - // getAssignmentInsts - // getAssignmentMarkers - // RAUW - // deleteAll - // - // The input IR includes two functions, fun1 and fun2. Both contain an alloca - // with a DIAssignID tag. fun1's alloca is linked to two llvm.dbg.assign - // intrinsics, one of which is for an inlined variable and appears before the - // alloca. - - LLVMContext C; - std::unique_ptr M = parseIR(C, R"( - define dso_local void @fun1() !dbg !7 { - entry: - call void @llvm.dbg.assign(metadata i32 undef, metadata !10, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !13 - %local = alloca i32, align 4, !DIAssignID !12 - call void @llvm.dbg.assign(metadata i32 undef, metadata !16, metadata !DIExpression(), metadata !12, metadata i32 undef, metadata !DIExpression()), !dbg !15 - ret void, !dbg !15 - } - - define dso_local void @fun2() !dbg !17 { - entry: - %local = alloca i32, align 4, !DIAssignID !20 - call void @llvm.dbg.assign(metadata i32 undef, metadata !18, metadata !DIExpression(), metadata !20, metadata i32 undef, metadata !DIExpression()), !dbg !19 - ret void, !dbg !19 - } - - declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!3, !4, !5} - !llvm.ident = !{!6} - - !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) - !1 = !DIFile(filename: "test.c", directory: "/") - !2 = !{} - !3 = !{i32 7, !"Dwarf Version", i32 4} - !4 = !{i32 2, !"Debug Info Version", i32 3} - !5 = !{i32 1, !"wchar_size", i32 4} - !6 = !{!"clang version 14.0.0"} - !7 = distinct !DISubprogram(name: "fun1", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) - !8 = !DISubroutineType(types: !9) - !9 = !{null} - !10 = !DILocalVariable(name: "local3", scope: !14, file: !1, line: 2, type: !11) - !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !12 = distinct !DIAssignID() - !13 = !DILocation(line: 5, column: 1, scope: !14, inlinedAt: !15) - !14 = distinct !DISubprogram(name: "inline", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) - !15 = !DILocation(line: 3, column: 1, scope: !7) - !16 = !DILocalVariable(name: "local1", scope: !7, file: !1, line: 2, type: !11) - !17 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) - !18 = !DILocalVariable(name: "local2", scope: !17, file: !1, line: 2, type: !11) - !19 = !DILocation(line: 4, column: 1, scope: !17) - !20 = distinct !DIAssignID() - )"); - - // Check the test IR isn't malformed. - ASSERT_TRUE(M); - - Function &Fun1 = *M->getFunction("fun1"); - Instruction &Alloca = *Fun1.getEntryBlock().getFirstNonPHIOrDbg(); - - // 1. Check the Instruction <-> Intrinsic mappings work in fun1. - // - // Check there are two llvm.dbg.assign intrinsics linked to Alloca. - auto CheckFun1Mapping = [&Alloca]() { - auto Markers = at::getAssignmentMarkers(&Alloca); - EXPECT_TRUE(std::distance(Markers.begin(), Markers.end()) == 2); - // Check those two entries are distinct. - DbgAssignIntrinsic *First = *Markers.begin(); - DbgAssignIntrinsic *Second = *std::next(Markers.begin()); - EXPECT_NE(First, Second); - - // Check that we can get back to Alloca from each llvm.dbg.assign. - for (auto *DAI : Markers) { - auto Insts = at::getAssignmentInsts(DAI); - // Check there is exactly one instruction linked to each intrinsic. Use - // ASSERT_TRUE because we're going to dereference the begin iterator. - ASSERT_TRUE(std::distance(Insts.begin(), Insts.end()) == 1); - EXPECT_FALSE(Insts.empty()); - // Check the linked instruction is Alloca. - Instruction *LinkedInst = *Insts.begin(); - EXPECT_EQ(LinkedInst, &Alloca); - } - }; - CheckFun1Mapping(); - - // 2. Check DIAssignID RAUW replaces attachments and uses. - // - DIAssignID *Old = - cast_or_null(Alloca.getMetadata(LLVMContext::MD_DIAssignID)); - DIAssignID *New = DIAssignID::getDistinct(C); - ASSERT_TRUE(Old && New && New != Old); - at::RAUW(Old, New); - // Check fun1's alloca and intrinsics have been updated and the mapping still - // works. - EXPECT_EQ(New, cast_or_null( - Alloca.getMetadata(LLVMContext::MD_DIAssignID))); - CheckFun1Mapping(); - - // Check that fun2's alloca and intrinsic have not not been updated. - Instruction &Fun2Alloca = - *M->getFunction("fun2")->getEntryBlock().getFirstNonPHIOrDbg(); - DIAssignID *Fun2ID = cast_or_null( - Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID)); - EXPECT_NE(New, Fun2ID); - auto Fun2Markers = at::getAssignmentMarkers(&Fun2Alloca); - ASSERT_TRUE(std::distance(Fun2Markers.begin(), Fun2Markers.end()) == 1); - auto Fun2Insts = at::getAssignmentInsts(*Fun2Markers.begin()); - ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); - EXPECT_EQ(*Fun2Insts.begin(), &Fun2Alloca); - - // 3. Check that deleting works and applies only to the target function. - at::deleteAll(&Fun1); - // There should now only be the alloca and ret in fun1. - EXPECT_EQ(Fun1.begin()->size(), 2); - // fun2's alloca should have the same DIAssignID and remain linked to its - // llvm.dbg.assign. - EXPECT_EQ(Fun2ID, cast_or_null( - Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID))); - EXPECT_FALSE(at::getAssignmentMarkers(&Fun2Alloca).empty()); -} - } // end namespace From 604027519e937d53fc61f339f578f2c4d37067f0 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 7 Nov 2022 15:13:09 -0800 Subject: [PATCH 484/516] [mlir:SubElementInterfaces] Remove unnecessary static check We already constrain the template on just attributes/types. --- mlir/include/mlir/IR/SubElementInterfaces.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/include/mlir/IR/SubElementInterfaces.h b/mlir/include/mlir/IR/SubElementInterfaces.h index 07d246aafbfa7..2af7642e93b25 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.h +++ b/mlir/include/mlir/IR/SubElementInterfaces.h @@ -138,8 +138,7 @@ struct AttrTypeSubElementHandler< return T(); if constexpr (std::is_base_of_v) { return cast(attrRepls.take_front(1)[0]); - } else if constexpr (!detail::IsInterface::value && - std::is_base_of_v) { + } else { return cast(typeRepls.take_front(1)[0]); } } From b22d80dc6a6af6328d68f7b944627f9278ff6ffb Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Mon, 7 Nov 2022 15:18:20 -0800 Subject: [PATCH 485/516] Revert "[NFC] Move getDebugValueLoc from static in Local.cpp to DebugInfo.h" This reverts commit 80378a4ca725eeeae940b99220b3913f7b73c895. I am reverting this patch because I need to revert 171f7024cc82e8702abebdedb699d37b50574be7 and without reverting this patch, reverting 171f7024cc82e8702abebdedb699d37b50574be7 causes conflicts. Patch 171f7024cc82e8702abebdedb699d37b50574be7 introduced a cyclic dependancy in the module build. https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/48197/consoleFull#-69937453049ba4694-19c4-4d7e-bec5-911270d8a58c In file included from :1: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/Argument.h:18:10: fatal error: cyclic dependency in module 'LLVM_IR': LLVM_IR -> LLVM_intrinsic_gen -> LLVM_IR ^ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: While building module 'LLVM_IR' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57: In file included from :12: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/IR/DebugInfo.h:24:10: fatal error: could not build module 'LLVM_intrinsic_gen' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ While building module 'LLVM_MC' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14: In file included from :15: In file included from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCContext.h:23: /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/MC/MCPseudoProbe.h:57:10: fatal error: could not build module 'LLVM_IR' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~ /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/lib/MC/MCAsmInfoCOFF.cpp:14:10: fatal error: could not build module 'LLVM_MC' ~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ 4 errors generated. --- llvm/include/llvm/IR/DebugInfo.h | 4 ---- llvm/lib/IR/DebugInfo.cpp | 12 ------------ llvm/lib/Transforms/Utils/Local.cpp | 21 +++++++++++++++++---- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 705a2b2e86e68..b35d447a7c891 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -49,10 +49,6 @@ void findDbgUsers(SmallVectorImpl &DbgInsts, Value *V); /// Find subprogram that is enclosing this scope. DISubprogram *getDISubprogram(const MDNode *Scope); -/// Produce a DebugLoc to use for each dbg.declare that is promoted to a -/// dbg.value. -DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII); - /// Strip debug info in the module if it exists. /// /// To do this, we remove all calls to the debugger intrinsics and any named diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index fa11a222bcf5c..d30fca63067c0 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -138,18 +138,6 @@ DISubprogram *llvm::getDISubprogram(const MDNode *Scope) { return nullptr; } -DebugLoc llvm::getDebugValueLoc(DbgVariableIntrinsic *DII) { - // Original dbg.declare must have a location. - const DebugLoc &DeclareLoc = DII->getDebugLoc(); - MDNode *Scope = DeclareLoc.getScope(); - DILocation *InlinedAt = DeclareLoc.getInlinedAt(); - // Because no machine insts can come from debug intrinsics, only the scope - // and inlinedAt is significant. Zero line numbers are used in case this - // DebugLoc leaks into any adjacent instructions. Produce an unknown location - // with the correct scope / inlinedAt fields. - return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); -} - //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index e31e69130d671..e6b3b5cf159a1 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1495,6 +1495,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) { return false; } +/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted +/// to a dbg.value. Because no machine insts can come from debug intrinsics, +/// only the scope and inlinedAt is significant. Zero line numbers are used in +/// case this DebugLoc leaks into any adjacent instructions. +static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) { + // Original dbg.declare must have a location. + const DebugLoc &DeclareLoc = DII->getDebugLoc(); + MDNode *Scope = DeclareLoc.getScope(); + DILocation *InlinedAt = DeclareLoc.getInlinedAt(); + // Produce an unknown location with the correct scope / inlinedAt fields. + return DILocation::get(DII->getContext(), 0, 0, Scope, InlinedAt); +} + /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic. void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, @@ -1505,7 +1518,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, auto *DIExpr = DII->getExpression(); Value *DV = SI->getValueOperand(); - DebugLoc NewLoc = getDebugValueLoc(DII); + DebugLoc NewLoc = getDebugValueLoc(DII, SI); if (!valueCoversEntireFragment(DV->getType(), DII)) { // FIXME: If storing to a part of the variable described by the dbg.declare, @@ -1540,7 +1553,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, return; } - DebugLoc NewLoc = getDebugValueLoc(DII); + DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); // We are now tracking the loaded value instead of the address. In the // future if multi-location support is added to the IR, it might be @@ -1574,7 +1587,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII, BasicBlock *BB = APN->getParent(); auto InsertionPt = BB->getFirstInsertionPt(); - DebugLoc NewLoc = getDebugValueLoc(DII); + DebugLoc NewLoc = getDebugValueLoc(DII, nullptr); // The block may be a catchswitch block, which does not have a valid // insertion point. @@ -1646,7 +1659,7 @@ bool llvm::LowerDbgDeclare(Function &F) { // pointer to the variable. Insert a *value* intrinsic that describes // the variable by dereferencing the alloca. if (!CI->isLifetimeStartOrEnd()) { - DebugLoc NewLoc = getDebugValueLoc(DDI); + DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr); auto *DerefExpr = DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, From ec9aae9784fc7ee398490566926b625cd7cc4314 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 7 Nov 2022 23:22:09 +0000 Subject: [PATCH 486/516] [gn build] Port 428ac8f3a0f9 --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 535d87d9f12dd..b8e99c34ca3c2 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -112,12 +112,12 @@ static_library("clangd") { "Preamble.cpp", "Protocol.cpp", "Quality.cpp", - "QueryDriverDatabase.cpp", "RIFF.cpp", "Selection.cpp", "SemanticHighlighting.cpp", "SemanticSelection.cpp", "SourceCode.cpp", + "SystemIncludeExtractor.cpp", "TUScheduler.cpp", "TidyProvider.cpp", "URI.cpp", From 7c2d3153a9481793da58894dbf35d4994f3b67a4 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Mon, 7 Nov 2022 19:11:46 -0500 Subject: [PATCH 487/516] [mlir][spirv] Don't return value when cannot fold spirv.bitcast Returing a value would make the canonicalization infrastructure think that folding succeeded so the pattern will be tried again when invoked via, e.g., `applyPatternsAndFoldGreedily` and eventually fail due to not converging after 10 times by default. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D137598 --- .../Dialect/SPIRV/IR/SPIRVCanonicalization.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp index 57e6475548642..b068d23f0e9f0 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp @@ -117,22 +117,22 @@ void spirv::AccessChainOp::getCanonicalizationPatterns( //===----------------------------------------------------------------------===// OpFoldResult spirv::BitcastOp::fold(ArrayRef /*operands*/) { - Value arg = getOperand(); - if (getType() == arg.getType()) - return arg; + Value curInput = getOperand(); + if (getType() == curInput.getType()) + return curInput; // Look through nested bitcasts. - if (auto bitcast = arg.getDefiningOp()) { - Value nestedArg = bitcast.getOperand(); - if (nestedArg.getType() == getType()) - return nestedArg; + if (auto prevCast = curInput.getDefiningOp()) { + Value prevInput = prevCast.getOperand(); + if (prevInput.getType() == getType()) + return prevInput; - getOperandMutable().assign(nestedArg); + getOperandMutable().assign(prevInput); return getResult(); } // TODO(kuhar): Consider constant-folding the operand attribute. - return getResult(); + return {}; } //===----------------------------------------------------------------------===// From 32a02a9c6bad489ce7c02f3f0b306a8fc1e67fd5 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Tue, 18 Oct 2022 15:20:49 -0700 Subject: [PATCH 488/516] [Debuginfod] DEBUGINFOD_HEADERS_FILE environment variable This change adds a DEBUGINFOD_HEADERS_FILE environment variable provides a file containing HTTP headers to attach to outgoing HTTP requests, one per line. This allows a file permissioned with OS access control mechanisms to supply bearer credentials for Debuginfod requests. This matches the mechanism recently added to elfutils' libdebuginfod. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D136303 --- llvm/include/llvm/Debuginfod/HTTPClient.h | 1 + llvm/lib/Debuginfod/Debuginfod.cpp | 41 +++++++++++++++++++ llvm/lib/Debuginfod/HTTPClient.cpp | 6 +++ .../Inputs/capture_req.py | 23 +++++++++++ .../tools/llvm-debuginfod-find/Inputs/headers | 12 ++++++ .../tools/llvm-debuginfod-find/headers.test | 27 ++++++++++++ 6 files changed, 110 insertions(+) create mode 100644 llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py create mode 100644 llvm/test/tools/llvm-debuginfod-find/Inputs/headers create mode 100644 llvm/test/tools/llvm-debuginfod-find/headers.test diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h index 6c94961032e75..1c9f719051eca 100644 --- a/llvm/include/llvm/Debuginfod/HTTPClient.h +++ b/llvm/include/llvm/Debuginfod/HTTPClient.h @@ -27,6 +27,7 @@ enum class HTTPMethod { GET }; /// A stateless description of an outbound HTTP request. struct HTTPRequest { SmallString<128> Url; + SmallVector Headers; HTTPMethod Method = HTTPMethod::GET; bool FollowRedirects = true; HTTPRequest(StringRef Url); diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index ee5cc5141f74f..f20b5bc677e00 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -34,6 +35,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/xxhash.h" @@ -169,6 +171,44 @@ Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { return Error::success(); } +// An over-accepting simplification of the HTTP RFC 7230 spec. +static bool isHeader(StringRef S) { + StringRef Name; + StringRef Value; + std::tie(Name, Value) = S.split(':'); + if (Name.empty() || Value.empty()) + return false; + return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && + all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); +} + +static SmallVector getHeaders() { + const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); + if (!Filename) + return {}; + ErrorOr> HeadersFile = + MemoryBuffer::getFile(Filename, /*IsText=*/true); + if (!HeadersFile) + return {}; + + SmallVector Headers; + uint64_t LineNumber = 0; + for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { + LineNumber++; + if (!isHeader(Line)) { + if (!all_of(Line, llvm::isSpace)) + WithColor::warning() + << "could not parse debuginfod header: " << Filename << ':' + << LineNumber << '\n'; + continue; + } + if (Line.back() == '\r') + Line = Line.drop_back(); + Headers.emplace_back(Line); + } + return Headers; +} + Expected getCachedOrDownloadArtifact( StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef DebuginfodUrls, std::chrono::milliseconds Timeout) { @@ -214,6 +254,7 @@ Expected getCachedOrDownloadArtifact( StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); }, Client); HTTPRequest Request(ArtifactUrl); + Request.Headers = getHeaders(); Error Err = Client.perform(Request, Handler); if (Err) return std::move(Err); diff --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp index 3376eaa7cd0d2..f9201e4f96268 100644 --- a/llvm/lib/Debuginfod/HTTPClient.cpp +++ b/llvm/lib/Debuginfod/HTTPClient.cpp @@ -111,9 +111,15 @@ Error HTTPClient::perform(const HTTPRequest &Request, curl_easy_setopt(Curl, CURLOPT_URL, Url.c_str()); curl_easy_setopt(Curl, CURLOPT_FOLLOWLOCATION, Request.FollowRedirects); + curl_slist *Headers = nullptr; + for (const std::string &Header : Request.Headers) + Headers = curl_slist_append(Headers, Header.c_str()); + curl_easy_setopt(Curl, CURLOPT_HTTPHEADER, Headers); + CurlHTTPRequest CurlRequest(Handler); curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest); CURLcode CurlRes = curl_easy_perform(Curl); + curl_slist_free_all(Headers); if (CurlRes != CURLE_OK) return joinErrors(std::move(CurlRequest.ErrorState), createStringError(errc::io_error, diff --git a/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py b/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py new file mode 100644 index 0000000000000..56fa2d08a0897 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py @@ -0,0 +1,23 @@ +import http.server +import os +import subprocess +import sys +import threading + +class TrivialHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(501) + + def log_request(self, *args, **kwargs): + print(self.requestline) + print(self.headers) + +httpd = http.server.HTTPServer(('', 0), TrivialHandler) +port = httpd.socket.getsockname()[1] + +try: + t = threading.Thread(target=httpd.serve_forever).start() + os.environ['DEBUGINFOD_URLS'] =f'http://localhost:{port}' + subprocess.run(sys.argv[1:], capture_output = True) +finally: + httpd.shutdown() diff --git a/llvm/test/tools/llvm-debuginfod-find/Inputs/headers b/llvm/test/tools/llvm-debuginfod-find/Inputs/headers new file mode 100644 index 0000000000000..9f66ac2821c09 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/Inputs/headers @@ -0,0 +1,12 @@ + + +A: +:A +: +A :B + +A:B +C: D +E:F +hi!$: j k + diff --git a/llvm/test/tools/llvm-debuginfod-find/headers.test b/llvm/test/tools/llvm-debuginfod-find/headers.test new file mode 100644 index 0000000000000..6fe814db51799 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/headers.test @@ -0,0 +1,27 @@ +REQUIRES: curl + +RUN: %python %S/Inputs/capture_req.py llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix NO-HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=bad %python %S/Inputs/capture_req.py \ +RUN: llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix NO-HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=%S/Inputs/headers %python %S/Inputs/capture_req.py \ +RUN: llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=%S/Inputs/headers DEBUGINFOD_URLS=fake not llvm-debuginfod-find --debuginfo 0 2>&1 \ +RUN: | FileCheck --check-prefix ERR -DHEADER_FILE=%S/Inputs/headers %s + +NO-HEADERS: Accept: */* +NO-HEADERS-NOT: {{.}} + +HEADERS: Accept: */* +HEADERS-NEXT: A: B +HEADERS-NEXT: C: D +HEADERS-NEXT: E: F +HEADERS-NEXT: hi!$: j k +HEADERS-NOT: {{.}} + +ERR: warning: could not parse debuginfod header: [[HEADER_FILE]]:3 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:4 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:5 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:6 From ecedc4d71064b0d6fe1f5556b9bac37c6aa89bb3 Mon Sep 17 00:00:00 2001 From: Katherine Rasmussen Date: Tue, 1 Nov 2022 14:14:08 -0700 Subject: [PATCH 489/516] [flang] Add atomic_xor to list of intrinsics Add the atomic subroutine, atomic_xor, to the list of intrinsic subroutines, add its last dummy argument to a check for a coindexed-object, and update test. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D137196 --- flang/docs/Intrinsics.md | 2 +- flang/lib/Evaluate/intrinsics.cpp | 13 +++++++++++-- flang/test/Semantics/atomic11.f90 | 20 ++++++++++++++------ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index baa0609c7ccaa..2af87f6adc84b 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -751,7 +751,7 @@ This phase currently supports all the intrinsic procedures listed above but the | Type inquiry intrinsic functions | BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, MAXEXPONENT, MINEXPONENT, NEW_LINE, PRECISION, RADIX, RANGE, TINY| | Non-standard intrinsic functions | AND, OR, XOR, LSHIFT, RSHIFT, SHIFT, ZEXT, IZEXT, COSD, SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, DCMPLX, EQV, NEQV, INT8, JINT, JNINT, KNINT, LOC, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, IXOR, IARG, IARGC, NARGS, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC | | Intrinsic subroutines |MVBITS (elemental), CPU_TIME, DATE_AND_TIME, EVENT_QUERY, EXECUTE_COMMAND_LINE, GET_COMMAND, GET_COMMAND_ARGUMENT, GET_ENVIRONMENT_VARIABLE, MOVE_ALLOC, RANDOM_INIT, RANDOM_NUMBER, RANDOM_SEED, SYSTEM_CLOCK | -| Atomic intrinsic subroutines | ATOMIC_ADD &al. | +| Atomic intrinsic subroutines | ATOMIC_ADD | | Collective intrinsic subroutines | CO_REDUCE | diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 8841d5456045c..935586b118b17 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1232,6 +1232,14 @@ static const IntrinsicInterface intrinsicSubroutine[]{ {"stat", AnyInt, Rank::scalar, Optionality::optional, common::Intent::Out}}, {}, Rank::elemental, IntrinsicClass::atomicSubroutine}, + {"atomic_xor", + {{"atom", AtomicInt, Rank::atom, Optionality::required, + common::Intent::InOut}, + {"value", AnyInt, Rank::scalar, Optionality::required, + common::Intent::In}, + {"stat", AnyInt, Rank::scalar, Optionality::optional, + common::Intent::Out}}, + {}, Rank::elemental, IntrinsicClass::atomicSubroutine}, {"co_broadcast", {{"a", AnyData, Rank::anyOrAssumedRank, Optionality::required, common::Intent::InOut}, @@ -1373,7 +1381,7 @@ static const IntrinsicInterface intrinsicSubroutine[]{ }; // TODO: Intrinsic subroutine EVENT_QUERY -// TODO: Atomic intrinsic subroutines: ATOMIC_ADD &al. +// TODO: Atomic intrinsic subroutines: ATOMIC_ADD // TODO: Collective intrinsic subroutines: co_reduce // Finds a built-in derived type and returns it as a DynamicType. @@ -2761,7 +2769,8 @@ static bool ApplySpecificChecks(SpecificCall &call, FoldingContext &context) { } } else if (name == "associated") { return CheckAssociated(call, context); - } else if (name == "atomic_and" || name == "atomic_or") { + } else if (name == "atomic_and" || name == "atomic_or" || + name == "atomic_xor") { return CheckForCoindexedObject(context, call.arguments[2], name, "stat"); } else if (name == "atomic_cas") { return CheckForCoindexedObject(context, call.arguments[4], name, "stat"); diff --git a/flang/test/Semantics/atomic11.f90 b/flang/test/Semantics/atomic11.f90 index d46c7af1b03b5..1c50825e5541f 100644 --- a/flang/test/Semantics/atomic11.f90 +++ b/flang/test/Semantics/atomic11.f90 @@ -1,10 +1,9 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -! XFAIL: * ! This test checks for semantic errors in atomic_xor subroutine calls based on ! the interface defined in section 16.9.30 of the Fortran 2018 standard. program test_atomic_xor - use iso_fortran_env, only: atomic_int_kind + use iso_fortran_env, only: atomic_int_kind, atomic_logical_kind implicit none integer(kind=atomic_int_kind) :: scalar_coarray[*], non_scalar_coarray(10)[*], val, non_coarray @@ -13,6 +12,7 @@ program test_atomic_xor integer(kind=1) :: kind1_coarray[*] real :: non_integer_coarray[*] logical :: non_integer + logical(atomic_logical_kind) :: atomic_logical[*] !___ standard-conforming calls ___ call atomic_xor(scalar_coarray, val) @@ -27,13 +27,16 @@ program test_atomic_xor !___ non-standard-conforming calls ___ - !ERROR: 'atom=' argument must be a scalar coarray for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(non_scalar_coarray, val) - !ERROR: 'atom=' argument must be a coarray or a coindexed object for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' + call atomic_xor(non_scalar_coarray[1], val) + + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(non_coarray, val) - !ERROR: 'atom=' argument must be a coarray or a coindexed object for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(array, val) !ERROR: Actual argument for 'atom=' must have kind=atomic_int_kind, but is 'INTEGER(4)' @@ -45,6 +48,9 @@ program test_atomic_xor !ERROR: Actual argument for 'atom=' has bad type 'REAL(4)' call atomic_xor(non_integer_coarray, val) + !ERROR: Actual argument for 'atom=' has bad type 'LOGICAL(8)' + call atomic_xor(atomic_logical, val) + !ERROR: 'value=' argument has unacceptable rank 1 call atomic_xor(scalar_coarray, array) @@ -57,9 +63,11 @@ program test_atomic_xor !ERROR: 'stat=' argument has unacceptable rank 1 call atomic_xor(scalar_coarray, val, status_array) + !ERROR: 'stat' argument to 'atomic_xor' may not be a coindexed object call atomic_xor(scalar_coarray, val, coindexed_status[1]) - !ERROR: Actual argument associated with INTENT(OUT) dummy argument 'stat=' must be definable + !ERROR: Actual argument associated with INTENT(OUT) dummy argument 'stat=' is not definable + !BECAUSE: '1_4' is not a variable or pointer call atomic_xor(scalar_coarray, val, 1) !ERROR: missing mandatory 'atom=' argument From 32a2af44e1e882f13d1cc2817f0a8d4d8b375d4d Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 8 Nov 2022 01:36:43 +0000 Subject: [PATCH 490/516] [CMake] Fix -Wstrict-prototypes Fixes warnings (or errors, if someone injects -Werror in their build system, which happens in fact with some folks vendoring LLVM too) with Clang 16: ``` +/var/tmp/portage.notmp/portage/sys-devel/llvm-15.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: warning: a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes] -/var/tmp/portage.notmp/portage/sys-devel/llvm-14.0.4/work/llvm_build-abi_x86_64.amd64/CMakeFiles/CMakeTmp/src.c:3:9: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes] int main() {return 0;} ^ void ``` Differential Revision: https://reviews.llvm.org/D137503 --- .../cmake/Modules/CompilerRTDarwinUtils.cmake | 2 +- compiler-rt/cmake/config-ix.cmake | 2 +- compiler-rt/lib/builtins/CMakeLists.txt | 2 +- libcxx/cmake/config-ix.cmake | 2 +- libcxxabi/cmake/config-ix.cmake | 2 +- libunwind/cmake/config-ix.cmake | 2 +- lldb/tools/debugserver/source/CMakeLists.txt | 2 +- llvm/cmake/config-ix.cmake | 2 +- llvm/cmake/modules/FindFFI.cmake | 2 +- llvm/cmake/modules/FindTerminfo.cmake | 2 +- llvm/cmake/modules/FindZ3.cmake | 3 ++- llvm/cmake/modules/HandleLLVMOptions.cmake | 2 +- openmp/runtime/cmake/config-ix.cmake | 2 +- polly/lib/External/CMakeLists.txt | 24 +++++++++---------- 14 files changed, 26 insertions(+), 25 deletions(-) diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake index e2506872751f9..e372da0d99ba0 100644 --- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake @@ -116,7 +116,7 @@ function(darwin_test_archs os valid_archs) if(NOT TEST_COMPILE_ONLY) message(STATUS "Finding valid architectures for ${os}...") set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c) - file(WRITE ${SIMPLE_C} "#include \nint main() { printf(__FILE__); return 0; }\n") + file(WRITE ${SIMPLE_C} "#include \nint main(void) { printf(__FILE__); return 0; }\n") set(os_linker_flags) foreach(flag ${DARWIN_${os}_LINK_FLAGS}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index da86bdcdcf169..f6190ee60e3c3 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -224,7 +224,7 @@ set(COMPILER_RT_SUPPORTED_ARCH) # runtime libraries supported by our current compilers cross-compiling # abilities. set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc) -file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main() { printf(\"hello, world\"); }\n") +file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main(void) { printf(\"hello, world\"); }\n") # Detect whether the current target platform is 32-bit or 64-bit, and setup # the correct commandline flags needed to attempt to target 32-bit and 64-bit. diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index fd3d3956439d2..42015ef8f36d6 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -755,7 +755,7 @@ else () SOURCE "#if !(__ARM_FP & 0x8) #error No double-precision support! #endif - int main() { return 0; }") + int main(void) { return 0; }") if(NOT COMPILER_RT_HAS_${arch}_VFP_DP) list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES}) endif() diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index a5ce4745a5f6a..3bae536436835 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -98,7 +98,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake index ff9a1bf349e52..f4ee8946c1fea 100644 --- a/libcxxabi/cmake/config-ix.cmake +++ b/libcxxabi/cmake/config-ix.cmake @@ -81,7 +81,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index 96cb8afcb485c..d311477f02c69 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -85,7 +85,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index f636e387bf1f0..c6e7e8cf49e85 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -95,7 +95,7 @@ check_c_source_compiles( #else #error Not building for ARM64 #endif - int main() { return 0; } + int main(void) { return 0; } " BUILDING_FOR_ARM64_OSX ) diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index ba85da6a8c3d0..15a7d78b3ac43 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -71,7 +71,7 @@ if(APPLE) CHECK_C_SOURCE_COMPILES(" static const char *__crashreporter_info__ = 0; asm(\".desc ___crashreporter_info__, 0x10\"); - int main() { return 0; }" + int main(void) { return 0; }" HAVE_CRASHREPORTER_INFO) endif() diff --git a/llvm/cmake/modules/FindFFI.cmake b/llvm/cmake/modules/FindFFI.cmake index b0d859af89598..a493a89d63017 100644 --- a/llvm/cmake/modules/FindFFI.cmake +++ b/llvm/cmake/modules/FindFFI.cmake @@ -45,7 +45,7 @@ if(FFI_LIBRARIES) struct ffi_cif; typedef struct ffi_cif ffi_cif; void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); - int main() { ffi_call(0, 0, 0, 0); }" + int main(void) { ffi_call(0, 0, 0, 0); }" HAVE_FFI_CALL) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindTerminfo.cmake b/llvm/cmake/modules/FindTerminfo.cmake index 65edb80fa69a8..eef1f95853eb2 100644 --- a/llvm/cmake/modules/FindTerminfo.cmake +++ b/llvm/cmake/modules/FindTerminfo.cmake @@ -20,7 +20,7 @@ if(Terminfo_LIBRARIES) list(APPEND CMAKE_REQUIRED_LIBRARIES ${Terminfo_LIBRARIES}) check_c_source_compiles(" int setupterm(char *term, int filedes, int *errret); - int main() { return setupterm(0, 0, 0); }" + int main(void) { return setupterm(0, 0, 0); }" Terminfo_LINKABLE) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindZ3.cmake b/llvm/cmake/modules/FindZ3.cmake index afb2c31756419..72fb5a96a52b9 100644 --- a/llvm/cmake/modules/FindZ3.cmake +++ b/llvm/cmake/modules/FindZ3.cmake @@ -18,8 +18,9 @@ function(check_z3_version z3_include z3_lib) # The program that will be executed to print Z3's version. file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp "#include + #include #include - int main() { + int main(void) { unsigned int major, minor, build, rev; Z3_get_version(&major, &minor, &build, &rev); printf(\"%u.%u.%u\", major, minor, build); diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 7828e8a1627f2..abf10df855047 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -779,7 +779,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) # line is also a // comment. set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment") - CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}" + CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main(void) {return 0;}" C_WCOMMENT_ALLOWS_LINE_WRAP) set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index ac6c81670211b..aa79c2a605f95 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -27,7 +27,7 @@ function(libomp_check_version_symbols retval) void func2() { printf(\"World\"); } __asm__(\".symver func1, func@VER1\"); __asm__(\".symver func2, func@VER2\"); - int main() { + int main(void) { func1(); func2(); return 0; diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index 2f912e7daeb21..c0a5b32e283f2 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -64,7 +64,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" ${_includes} ${_type} typeVar; - int main() { + int main(void) { return 0; } " ${_variable}) @@ -73,7 +73,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" int func(void) __attribute__((__warn_unused_result__)); - int main() { return 0; } + int main(void) { return 0; } " HAS_ATTRIBUTE_WARN_UNUSED_RESULT) set(GCC_WARN_UNUSED_RESULT) if (HAS_ATTRIBUTE_WARN_UNUSED_RESULT) @@ -82,22 +82,22 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" __attribute__ ((unused)) static void foo(void); - int main() { return 0; } + int main(void) { return 0; } " HAVE___ATTRIBUTE__) check_c_source_compiles_numeric(" #include - int main() { (void)ffs(0); return 0; } + int main(void) { (void)ffs(0); return 0; } " HAVE_DECL_FFS) check_c_source_compiles_numeric(" - int main() { (void)__builtin_ffs(0); return 0; } + int main(void) { (void)__builtin_ffs(0); return 0; } " HAVE_DECL___BUILTIN_FFS) check_c_source_compiles_numeric(" #include - int main() { (void)_BitScanForward(NULL, 0); return 0; } + int main(void) { (void)_BitScanForward(NULL, 0); return 0; } " HAVE_DECL__BITSCANFORWARD) if (NOT HAVE_DECL_FFS AND @@ -109,12 +109,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strcasecmp(\"\", \"\"); return 0; } + int main(void) { (void)strcasecmp(\"\", \"\"); return 0; } " HAVE_DECL_STRCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_stricmp(\"\", \"\"); return 0; } + int main(void) { (void)_stricmp(\"\", \"\"); return 0; } " HAVE_DECL__STRICMP) if (NOT HAVE_DECL_STRCASECMP AND NOT HAVE_DECL__STRICMP) @@ -124,12 +124,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strncasecmp(\"\", \"\", 0); return 0; } + int main(void) { (void)strncasecmp(\"\", \"\", 0); return 0; } " HAVE_DECL_STRNCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_strnicmp(\"\", \"\", 0); return 0; } + int main(void) { (void)_strnicmp(\"\", \"\", 0); return 0; } " HAVE_DECL__STRNICMP) if (NOT HAVE_DECL_STRNCASECMP AND NOT HAVE_DECL__STRNICMP) @@ -139,12 +139,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL_SNPRINTF) check_c_source_compiles_numeric(" #include - int main() { _snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { _snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL__SNPRINTF) if (NOT HAVE_DECL_SNPRINTF AND NOT HAVE_DECL__SNPRINTF) From 674a17e9bbe82e8c53952fd94dcd862b17cb2d2f Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Mon, 7 Nov 2022 17:43:41 -0800 Subject: [PATCH 491/516] MIPS/compiler_rt: use synci to flush icache on r6 syscall makes it failed to build on mips64 for mipsel: ``` compiler-rt/lib/builtins/clear_cache.c:97:3: error: call to undeclared function 'syscall'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); ``` In this patch, we use `rdhwr` to get synci_step. If synci_step is zero, it means that the hardware will maintain the coherence. We need to do nothing. Then for r6+, `synci` is required to keep icache global. So we can use `synci` to flush icache. The ISA documents ask a `sync` and a `jr.hb` after `synci`. For pre-r6, we can use cacheflush libc function, which is same on Linux and FreeBSD. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D135565 --- compiler-rt/lib/builtins/clear_cache.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index bcc5922e073b3..8993761eb3d42 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -93,12 +93,29 @@ void __clear_cache(void *start, void *end) { #endif #elif defined(__linux__) && defined(__loongarch__) __asm__ volatile("ibar 0"); -#elif defined(__linux__) && defined(__mips__) +#elif defined(__mips__) const uintptr_t start_int = (uintptr_t)start; const uintptr_t end_int = (uintptr_t)end; - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); -#elif defined(__mips__) && defined(__OpenBSD__) - cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); + uintptr_t synci_step; + __asm__ volatile("rdhwr %0, $1" : "=r"(synci_step)); + if (synci_step != 0) { +#if __mips_isa_rev >= 6 + for (uintptr_t p = start_int; p < end_int; p += synci_step) + __asm__ volatile("synci 0(%0)" : : "r"(p)); + + // The last "move $at, $0" is the target of jr.hb instead of delay slot. + __asm__ volatile(".set noat\n" + "sync\n" + "addiupc $at, 12\n" + "jr.hb $at\n" + "move $at, $0\n" + ".set at"); +#else + // Pre-R6 may not be globalized. And some implementations may give strange + // synci_step. So, let's use libc call for it. + cacheflush(start, end_int - start_int, BCACHE); +#endif + } #elif defined(__aarch64__) && !defined(__APPLE__) uint64_t xstart = (uint64_t)(uintptr_t)start; uint64_t xend = (uint64_t)(uintptr_t)end; From 7bf3cb3ee871b6707b7f5114a7fde61485df450e Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Mon, 7 Nov 2022 13:20:48 -0800 Subject: [PATCH 492/516] [lldb] Fix issue with re.Pattern availability `re.Pattern` is introduced in Python 3.7. To support Python 3.6, fallback to typechecking against `SRE_Pattern`. Differential Revision: https://reviews.llvm.org/D137582 --- lldb/packages/Python/lldbsuite/test/decorators.py | 4 +++- lldb/packages/Python/lldbsuite/test/lldbtest.py | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 3e3db099cd4a6..dd47f6845ef2f 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -80,7 +80,9 @@ def _match_decorator_property(expected, actual): if isinstance(expected, no_match): return not _match_decorator_property(expected.item, actual) - if isinstance(expected, (re.Pattern, str)): + # Python 3.6 doesn't declare a `re.Pattern` type, get the dynamic type. + pattern_type = type(re.compile('')) + if isinstance(expected, (pattern_type, str)): return re.search(expected, actual) is not None if hasattr(expected, "__iter__"): diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 2d054f971cd02..63bad9d0241de 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -282,11 +282,14 @@ def check_value(self, test_base, val, error_msg=None): test_base.assertSuccess(val.GetError()) + # Python 3.6 doesn't declare a `re.Pattern` type, get the dynamic type. + pattern_type = type(re.compile('')) + if self.expect_name: test_base.assertEqual(self.expect_name, val.GetName(), this_error_msg) if self.expect_value: - if isinstance(self.expect_value, re.Pattern): + if isinstance(self.expect_value, pattern_type): test_base.assertRegex(val.GetValue(), self.expect_value, this_error_msg) else: @@ -296,7 +299,7 @@ def check_value(self, test_base, val, error_msg=None): test_base.assertEqual(self.expect_type, val.GetDisplayTypeName(), this_error_msg) if self.expect_summary: - if isinstance(self.expect_summary, re.Pattern): + if isinstance(self.expect_summary, pattern_type): test_base.assertRegex(val.GetSummary(), self.expect_summary, this_error_msg) else: From e72fb692104ebab682d1ea5aeec39358b11ad407 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 7 Nov 2022 19:38:36 -0800 Subject: [PATCH 493/516] [mlir][arith] Convert fastmath to LLVM dialect for some arith ops. This is a follow-up on D126305 and D136225. We can now preserve fastmath for arith::MaxFOp,MinFOp,RemFOp during ArithToLLVM conversion. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D137456 --- .../Conversion/ArithToLLVM/ArithToLLVM.cpp | 18 +++++----- .../Conversion/ArithToLLVM/arith-to-llvm.mlir | 33 ++++++++++++++++--- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index 1409b7fe1bca8..3ad01556b2f69 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -52,16 +52,16 @@ using FPToSIOpLowering = VectorConvertToLLVMPattern; using FPToUIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath -using MaxFOpLowering = VectorConvertToLLVMPattern; +using MaxFOpLowering = + VectorConvertToLLVMPattern; using MaxSIOpLowering = VectorConvertToLLVMPattern; using MaxUIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath -using MinFOpLowering = VectorConvertToLLVMPattern; +using MinFOpLowering = + VectorConvertToLLVMPattern; using MinSIOpLowering = VectorConvertToLLVMPattern; using MinUIOpLowering = @@ -74,9 +74,9 @@ using NegFOpLowering = VectorConvertToLLVMPattern; using OrIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath -using RemFOpLowering = VectorConvertToLLVMPattern; +using RemFOpLowering = + VectorConvertToLLVMPattern; using RemSIOpLowering = VectorConvertToLLVMPattern; using RemUIOpLowering = diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index eccd8755d7aa8..d8e49a55c2ad7 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -453,11 +453,11 @@ func.func @minmaxf(%arg0 : f32, %arg1 : f32) -> f32 { // CHECK-LABEL: @fastmath func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 : f32 -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fadd %arg0, %arg1 : f32 +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 %0 = arith.addf %arg0, %arg1 fastmath : f32 %1 = arith.mulf %arg0, %arg1 fastmath : f32 %2 = arith.negf %arg0 fastmath : f32 @@ -465,3 +465,26 @@ func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { %4 = arith.addf %arg0, %arg1 fastmath : f32 return } + +// ----- + +// CHECK-LABEL: @ops_supporting_fastmath +func.func @ops_supporting_fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %0 = arith.addf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fdiv %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %1 = arith.divf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %2 = arith.maxf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.intr.minnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %3 = arith.minf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %4 = arith.mulf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 + %5 = arith.negf %arg0 fastmath : f32 +// CHECK: llvm.frem %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %6 = arith.remf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fsub %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %7 = arith.subf %arg0, %arg1 fastmath : f32 + return +} From 9d96feb19b57526eca19cf6e27e61b8028a4bffc Mon Sep 17 00:00:00 2001 From: skc7 Date: Mon, 7 Nov 2022 22:06:12 +0530 Subject: [PATCH 494/516] [SLP][NFC] Restructure areTwoInsertFromSameBuildVector Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D137569 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ba44d4a77ca3a..ac2397beb4926 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7153,8 +7153,10 @@ static bool areTwoInsertFromSameBuildVector( return false; auto *IE1 = VU; auto *IE2 = V; - unsigned Idx1 = *getInsertIndex(IE1); - unsigned Idx2 = *getInsertIndex(IE2); + Optional Idx1 = getInsertIndex(IE1); + Optional Idx2 = getInsertIndex(IE2); + if (Idx1 == None || Idx2 == None) + return false; // Go through the vector operand of insertelement instructions trying to find // either VU as the original vector for IE2 or V as the original vector for // IE1. @@ -7165,14 +7167,14 @@ static bool areTwoInsertFromSameBuildVector( return V->hasOneUse(); if (IE1) { if ((IE1 != VU && !IE1->hasOneUse()) || - getInsertIndex(IE1).value_or(Idx2) == Idx2) + getInsertIndex(IE1).value_or(*Idx2) == *Idx2) IE1 = nullptr; else IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); } if (IE2) { if ((IE2 != V && !IE2->hasOneUse()) || - getInsertIndex(IE2).value_or(Idx1) == Idx1) + getInsertIndex(IE2).value_or(*Idx1) == *Idx1) IE2 = nullptr; else IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); From 4638ba7b45f896132f3ee8c665ed390167b5d6b4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 22 Oct 2022 10:05:54 -0700 Subject: [PATCH 495/516] llvm-reduce: Try to turn calls into something else Try to turn calls that look like operators into known intrinsics. Also try to turn calls that look like a load or a store into a load or store. --- .../reduce-opcodes-call-typed-pointers.ll | 87 +++++ .../tools/llvm-reduce/reduce-opcodes-call.ll | 335 ++++++++++++++++++ .../llvm-reduce/deltas/ReduceOpcodes.cpp | 148 ++++++++ 3 files changed, 570 insertions(+) create mode 100644 llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll create mode 100644 llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll new file mode 100644 index 0000000000000..f944b1d763b02 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll @@ -0,0 +1,87 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=opcodes --test FileCheck --test-arg --check-prefix=ALL --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,ALL %s < %t + +target datalayout = "A5" + +; ALL-LABEL: @call_void_no_args( +; RESULT-NEXT: store volatile i32 0, i32 addrspace(5)* null, align 4 +; RESULT-NEXT: ret void +define void @call_void_no_args() { + call void @void_no_args() + ret void +} + +; ALL-LABEL: @call_load_like_i32( +; RESULT-NEXT: %op = load volatile i32, i32 addrspace(1)* %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32(i32 addrspace(1)* %ptr) { + %op = call i32 @load_like_i32(i32 addrspace(1)* %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_ptr_ptr( +; RESULT-NEXT: %op = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %ptr, align 8 +; RESULT-NEXT: ret i32 addrspace(1)* %op +define i32 addrspace(1)* @call_load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)* %ptr) { + %op = call i32 addrspace(1)* @load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)* %ptr) + ret i32 addrspace(1)* %op +} + +; ALL-LABEL: @call_store_like_i16( +; RESULT-NEXT: store volatile i16 %val, i16 addrspace(1)* %ptr, align 2 +; RESULT-NEXT: ret void +define void @call_store_like_i16(i16 %val, i16 addrspace(1)* %ptr) { + call void @store_like_i16(i16 %val, i16 addrspace(1)* %ptr) + ret void +} + +; ALL-LABEL: @call_load_like_ptr_mismatch( +; RESULT-NEXT: %op = call i32 @load_like_ptr_mismatch(i16 addrspace(1)* %ptr) +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_ptr_mismatch(i16 addrspace(1)* %ptr) { + %op = call i32 @load_like_ptr_mismatch(i16 addrspace(1)* %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_store_like_ptr_store( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) { + call void @store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) + ret void +} + + +; ALL-LABEL: @call_store_like_ptr_store_swap( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) { + call void @store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) { + call void @store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type_swap( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type_swap(i16 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) { + call void @store_like_ptr_store_different_element_type_swap(i16 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) + ret void +} + +declare void @void_no_args() +declare i32 addrspace(1)* @load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)*) +declare i32 @load_like_i32(i32 addrspace(1)*) +declare void @store_like_i16(i16 %val, i16 addrspace(1)* %ptr) +declare i32 @load_like_ptr_mismatch(i16 addrspace(1)*) +declare void @store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) +declare void @store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) +declare void @store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) +declare void @store_like_ptr_store_different_element_type_swap(i16 addrspace(1)*, i32 addrspace(3)*) diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll new file mode 100644 index 0000000000000..c63ba522abef8 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll @@ -0,0 +1,335 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=opcodes --test FileCheck --test-arg --check-prefix=ALL --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,ALL %s < %t + +target datalayout = "A5" + +declare token @llvm.return.token() +declare void @llvm.uses.token(token) + +; ALL-LABEL: @call_token( +; RESULT-NEXT: %token = call token @llvm.return.token() +; RESULT-NEXT: call void @llvm.uses.token(token %token) +; RESULT-NEXT: ret void +define void @call_token() { + %token = call token @llvm.return.token() + call void @llvm.uses.token(token %token) + ret void +} + +; ALL-LABEL: @call_void_0_size_arg( +; RESULT-NEXT: store volatile {} %arg, ptr addrspace(5) null, align 1 +; RESULT-NEXT: ret void +define void @call_void_0_size_arg({} %arg) { + call void @void_0_size_arg({} %arg) + ret void +} + +; ALL-LABEL: @call_return_0_size( +; RESULT-NEXT: %op = load volatile {}, ptr %ptr, align 1 +; RESULT-NEXT: ret {} %op +define {} @call_return_0_size(ptr %ptr) { + %op = call {} @return_0_size(ptr %ptr) + ret {} %op +} + +; ALL-LABEL: define void @call_void_no_args( +; RESULT-NEXT: store volatile i32 0, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret void +define void @call_void_no_args() { + call void @void_no_args() + ret void +} + +; ALL-LABEL: @call_store_like_i16( +; RESULT-NEXT: store volatile i16 %val, ptr addrspace(1) %ptr, align 2 +; RESULT-NEXT: ret void +define void @call_store_like_i16(i16 %val, ptr addrspace(1) %ptr) { + call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @keep_call_store_like_i16( +; ALL-NEXT: call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) +; ALL-NEXT: ret void +define void @keep_call_store_like_i16(i16 %val, ptr addrspace(1) %ptr) { + call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_i16_swap( +; RESULT-NEXT: store volatile i16 %val, ptr addrspace(1) %ptr +; RESULT-NEXT: ret void +define void @call_store_like_i16_swap(ptr addrspace(1) %ptr, i16 %val) { + call void @store_like_i16_swap(ptr addrspace(1) %ptr, i16 %val) + ret void +} + +; ALL-LABEL: @call_store_like_i16_extra_arg( +; RESULT-NEXT: call void @store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) +; RESULT-NEXT: ret void +define void @call_store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) { + call void @store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) + ret void +} + +; ALL-LABEL: @call_store_like_i16_extra_ptr_arg( +; RESULT-NEXT: call void @store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) +; RESULT-NEXT: ret void +define void @call_store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) { + call void @store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store( +; RESULT-NEXT: store volatile ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) { + call void @store_like_ptr_store(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_swap( +; RESULT-NEXT: store volatile ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) { + call void @store_like_ptr_store_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type( +; RESULT-NEXT: store volatile ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) { + call void @store_like_ptr_store_different_element_type(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type_swap( +; RESULT-NEXT: store volatile ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) { + call void @store_like_ptr_store_different_element_type_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) + ret void +} + +; ALL-LABEL: @call_load_like_i32( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @keep_call_load_like_i32( +; ALL-NEXT: %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) +; ALL-NEXT: ret i32 %op +define i32 @keep_call_load_like_i32(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_i32_extra_arg( +; RESULT-NEXT: %op = call i32 @load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) { + %op = call i32 @load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_ptr_mismatch( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_ptr_mismatch(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_ptr_mismatch(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_skip_arg( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_skip_arg(float, ptr addrspace(1) %ptr) { + %op = call i32 @load_like_skip_arg(float poison, ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_fp_scalar_noargs( +; RESULT-NEXT: %op = load volatile float, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret float %op +define float @call_fp_scalar_noargs() { + %op = call nsz float @fp_scalar_noargs() + ret float %op +} + +; ALL-LABEL: @call_fp_vector_noargs( +; RESULT-NEXT: %op = load volatile <2 x half>, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_fp_vector_noargs() { + %op = call nsz <2 x half> @fp_vector_noargs() + ret <2 x half> %op +} + +; ALL-LABEL: @call_unary_fp_scalar( +; RESULT-NEXT: %op = fneg nsz float %a +; RESULT-NEXT: ret float %op +define float @call_unary_fp_scalar(float %a) { + %op = call nsz float @unary_fp_scalar(float %a) + ret float %op +} + +; ALL-LABEL: @call_unary_fp_vector( +; RESULT-NEXT: %op = fneg nsz <2 x half> %a +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_unary_fp_vector(<2 x half> %a) { + %op = call nsz <2 x half> @unary_fp_vector(<2 x half> %a) + ret <2 x half> %op +} + +; ALL-LABEL: @ignore_undef_args_unary_fp( +; RESULT-NEXT: %op = fneg nnan float %a +; RESULT-NEXT: ret float %op +define float @ignore_undef_args_unary_fp(float %a) { + %op = call nnan float @func_i32_f32_i32(i32 poison, float %a, i32 poison) + ret float %op +} + +; ALL-LABEL: @call_binary_fp_scalar( +; RESULT: %op = fmul afn float %a, %b +; RESULT-NEXT: ret float %op +define float @call_binary_fp_scalar(float %a, float %b) { + %op = call afn float @binary_fp_scalar(float %a, float %b) + ret float %op +} + +; ALL-LABEL: @call_binary_fp_vector( +; RESULT-NEXT: %op = fmul afn <2 x half> %a, %b +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_binary_fp_vector(<2 x half> %a, <2 x half> %b) { + %op = call afn <2 x half> @binary_fp_vector(<2 x half> %a, <2 x half> %b) + ret <2 x half> %op +} + +; ALL-LABEL: @call_ternary_fp_scalar( +; RESULT-NEXT: %op = call afn float @llvm.fma.f32(float %a, float %b, float %c) +; RESULT-NEXT: ret float %op +define float @call_ternary_fp_scalar(float %a, float %b, float %c) { + %op = call afn float @ternary_fp_scalar(float %a, float %b, float %c) + ret float %op +} + +; ALL-LABEL: @call_ternary_fp_vector( +; RESULT-NEXT: %op = call afn <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_ternary_fp_vector(<2 x half> %a, <2 x half> %b, <2 x half> %c) { + %op = call afn <2 x half> @ternary_fp_vector(<2 x half> %a, <2 x half> %b, <2 x half> %c) + ret <2 x half> %op +} + +; ALL-LABEL: @call_unary_int_scalar( +; RESULT-NEXT: %op = call i32 @llvm.bswap.i32(i32 %a) +; RESULT-NEXT: ret i32 %op +define i32 @call_unary_int_scalar(i32 %a) { + %op = call i32 @unary_int_scalar(i32 %a) + ret i32 %op +} + +; ALL-LABEL: @call_unary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_unary_int_vector(<2 x i16> %a) { + %op = call <2 x i16> @unary_int_vector(<2 x i16> %a) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_binary_int_scalar( +; RESULT-NEXT: %op = and i32 %a, %b +; RESULT-NEXT: ret i32 %op +define i32 @call_binary_int_scalar(i32 %a, i32 %b) { + %op = call i32 @binary_int_scalar(i32 %a, i32 %b) + ret i32 %op +} + +; ALL-LABEL: @call_binary_int_vector( +; RESULT-NEXT: %op = and <2 x i16> %a, %b +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_binary_int_vector(<2 x i16> %a, <2 x i16> %b) { + %op = call <2 x i16> @binary_int_vector(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_ternary_int_scalar( +; RESULT-NEXT: %op = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; RESULT-NEXT: ret i32 %op +define i32 @call_ternary_int_scalar(i32 %a, i32 %b, i32 %c) { + %op = call i32 @ternary_int_scalar(i32 %a, i32 %b, i32 %c) + ret i32 %op +} + +; ALL-LABEL: @call_ternary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_ternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { + %op = call <2 x i16> @ternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_quaternary_int_scalar( +; RESULT-NEXT: %op = call i32 @quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) +; RESULT-NEXT: ret i32 %op +define i32 @call_quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) { + %op = call i32 @quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) + ret i32 %op +} + +; ALL-LABEL: @call_quaternary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) { + %op = call <2 x i16> @quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) + ret <2 x i16> %op +} + +declare void @void_0_size_arg({}) +declare {} @return_0_size(ptr) +declare void @void_no_args() +declare void @store_like_i16(i16, ptr addrspace(1)) +declare void @store_like_i16_swap(ptr addrspace(1), i16) +declare void @store_like_i16_extra_arg(i16, ptr addrspace(1), i32) +declare void @store_like_i16_extra_ptr_arg(i16, ptr addrspace(1), ptr addrspace(1)) +declare void @store_like_ptr_store(ptr addrspace(3), ptr addrspace(1)) +declare void @store_like_ptr_store_swap(ptr addrspace(1), ptr addrspace(3)) +declare void @store_like_ptr_store_different_element_type(ptr addrspace(3), ptr addrspace(1)) +declare void @store_like_ptr_store_different_element_type_swap(ptr addrspace(1), ptr addrspace(3)) +declare i32 @load_like_i32(ptr addrspace(1)) + +declare i32 @load_like_i32_extra_arg(ptr addrspace(1), i32) + +declare i32 @load_like_ptr_mismatch(ptr addrspace(1)) +declare i32 @load_like_skip_arg(float, ptr addrspace(1)) + +declare float @fp_scalar_noargs() +declare i32 @int_scalar_noargs() + +declare <2 x half> @fp_vector_noargs() +declare <2 x i16> @int_vector_noargs() + +declare float @unary_fp_scalar(float) +declare <2 x half> @unary_fp_vector(<2 x half>) +declare float @func_i32_f32_i32(i32, float, i32) + +declare float @binary_fp_scalar(float, float) +declare <2 x half> @binary_fp_vector(<2 x half>, <2 x half>) + +declare float @ternary_fp_scalar(float, float, float) +declare <2 x half> @ternary_fp_vector(<2 x half>, <2 x half>, <2 x half>) + +declare float @quaternary_fp_scalar(float, float, float, float) +declare <2 x half> @quaternary_fp_vector(<2 x half>, <2 x half>, <2 x half>, <2 x half>) + +declare i32 @unary_int_scalar(i32) +declare <2 x i16> @unary_int_vector(<2 x i16>) +declare i32 @binary_int_scalar(i32, i32) +declare <2 x i16> @binary_int_vector(<2 x i16>, <2 x i16>) +declare i32 @ternary_int_scalar(i32, i32, i32) +declare <2 x i16> @ternary_int_vector(<2 x i16>, <2 x i16>, <2 x i16>) +declare i32 @quaternary_int_scalar(i32, i32, i32, i32) +declare <2 x i16> @quaternary_int_vector(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp index 2e515110517de..75a00ae22ee41 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp @@ -19,6 +19,12 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +// Assume outgoing undef arguments aren't relevant. +// TODO: Maybe skip any trivial constant arguments. +static bool shouldIgnoreArgument(const Value *V) { + return isa(V); +} + static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, Intrinsic::ID NewIID, ArrayRef Tys = None) { @@ -64,8 +70,142 @@ static Value *reduceIntrinsic(Oracle &O, Module &M, IntrinsicInst *II) { } } +/// Look for calls that look like they could be replaced with a load or store. +static bool callLooksLikeLoadStore(CallBase *CB, Value *&DataArg, + Value *&PtrArg) { + const bool IsStore = CB->getType()->isVoidTy(); + + PtrArg = nullptr; + DataArg = nullptr; + for (Value *Arg : CB->args()) { + if (shouldIgnoreArgument(Arg)) + continue; + + if (!Arg->getType()->isSized()) + return false; + + PointerType *PT = dyn_cast(Arg->getType()); + if (!PtrArg && PT) { + // FIXME: Could create bitcast for typed pointers, but roll back unused + // replacement only erases one instruction. + if (!IsStore && !PT->isOpaqueOrPointeeTypeMatches(CB->getType())) + return false; + + PtrArg = Arg; + continue; + } + + if (!IsStore || DataArg) + return false; + + DataArg = Arg; + } + + if (IsStore && !DataArg) { + // FIXME: For typed pointers, use element type? + DataArg = ConstantInt::get(IntegerType::getInt32Ty(CB->getContext()), 0); + } + + // If we didn't find any arguments, we can fill in the pointer. + if (!PtrArg) { + unsigned AS = CB->getModule()->getDataLayout().getAllocaAddrSpace(); + + PointerType *PtrTy = + PointerType::get(DataArg ? DataArg->getType() + : IntegerType::getInt32Ty(CB->getContext()), + AS); + + PtrArg = ConstantPointerNull::get(PtrTy); + } + + // Make sure we don't emit an invalid store with typed pointers. + if (IsStore && DataArg->getType()->getPointerTo( + cast(PtrArg->getType())->getAddressSpace()) != + PtrArg->getType()) + return false; + + return true; +} + +// TODO: Replace 2 pointer argument calls with memcpy +static Value *tryReplaceCallWithLoadStore(Oracle &O, Module &M, CallBase *CB) { + Value *PtrArg = nullptr; + Value *DataArg = nullptr; + if (!callLooksLikeLoadStore(CB, DataArg, PtrArg) || O.shouldKeep()) + return nullptr; + + IRBuilder<> B(CB); + if (DataArg) + return B.CreateStore(DataArg, PtrArg, true); + return B.CreateLoad(CB->getType(), PtrArg, true); +} + +static bool callLooksLikeOperator(CallBase *CB, + SmallVectorImpl &OperatorArgs) { + Type *ReturnTy = CB->getType(); + if (!ReturnTy->isFirstClassType()) + return false; + + for (Value *Arg : CB->args()) { + if (shouldIgnoreArgument(Arg)) + continue; + + if (Arg->getType() != ReturnTy) + return false; + + OperatorArgs.push_back(Arg); + } + + return true; +} + +static Value *tryReplaceCallWithOperator(Oracle &O, Module &M, CallBase *CB) { + SmallVector Arguments; + + if (!callLooksLikeOperator(CB, Arguments) || Arguments.size() > 3) + return nullptr; + + if (O.shouldKeep()) + return nullptr; + + IRBuilder<> B(CB); + if (CB->getType()->isFPOrFPVectorTy()) { + switch (Arguments.size()) { + case 1: + return B.CreateFNeg(Arguments[0]); + case 2: + return B.CreateFMul(Arguments[0], Arguments[1]); + case 3: + return B.CreateIntrinsic(Intrinsic::fma, {CB->getType()}, Arguments); + default: + return nullptr; + } + + llvm_unreachable("all argument sizes handled"); + } + + if (CB->getType()->isIntOrIntVectorTy()) { + switch (Arguments.size()) { + case 1: + return B.CreateUnaryIntrinsic(Intrinsic::bswap, Arguments[0]); + case 2: + return B.CreateAnd(Arguments[0], Arguments[1]); + case 3: + return B.CreateIntrinsic(Intrinsic::fshl, {CB->getType()}, Arguments); + default: + return nullptr; + } + + llvm_unreachable("all argument sizes handled"); + } + + return nullptr; +} + static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { IRBuilder<> B(&I); + + // TODO: fp binary operator with constant to fneg switch (I.getOpcode()) { case Instruction::FDiv: case Instruction::FRem: @@ -96,6 +236,14 @@ static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { if (IntrinsicInst *II = dyn_cast(&I)) return reduceIntrinsic(O, M, II); + CallBase *CB = cast(&I); + + if (Value *NewOp = tryReplaceCallWithOperator(O, M, CB)) + return NewOp; + + if (Value *NewOp = tryReplaceCallWithLoadStore(O, M, CB)) + return NewOp; + return nullptr; } default: From 6ceb607b302f83d9007b679055944cc284e45267 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 8 Nov 2022 05:43:06 +0000 Subject: [PATCH 496/516] [PowerPC][NFC] remove the rop-protect attribute in LIT cases. This flag will cause LLC warning: "'-rop-protection' is not a recognized feature for this target (ignoring feature)" Remove this unused feature first. We may also need to check why llc emits this warning as we declare '-rop-protection' not '+rop-protection'. --- llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll | 4 ++-- llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll | 4 ++-- llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll | 4 ++-- llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll | 4 ++-- llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll index af026593c1ee3..190d57a07b3cf 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll @@ -699,5 +699,5 @@ entry: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll index 37cba2a90c4ad..34ebe758343e5 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll @@ -714,5 +714,5 @@ entry: ; LARGE64-LABEL: L..C8: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll index 098f3ae4afb21..34e83221c4452 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll @@ -754,5 +754,5 @@ entry: ; LARGE64-LABEL: L..C8: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll index dbdf84fa68ef9..423719ec04c8f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll @@ -610,5 +610,5 @@ entry: ; DIS-NEXT: 8: 3f f0 00 00 ; DIS-NEXT: c: 00 00 00 00 -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll index 108f9758d055f..2550904e65fec 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll @@ -640,5 +640,5 @@ entry: ; DIS: 00000004 (idx: 37) TIUninit[UL]: ; DIS-NEXT: ... -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } From e661185fb3e5a94177404e474d6000f386594090 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 16 Oct 2022 22:49:30 -0700 Subject: [PATCH 497/516] InstCombine: Fold fdiv nnan x, 0 -> copysign(inf, x) https://alive2.llvm.org/ce/z/gLBFKB --- .../Transforms/InstCombine/InstCombineInternal.h | 1 + .../Transforms/InstCombine/InstCombineMulDivRem.cpp | 13 ++++++++++++- llvm/test/Transforms/InstCombine/fdiv.ll | 8 ++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 11aed7754c264..cc1dedf372752 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -370,6 +370,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldExtractOfOverflowIntrinsic(ExtractValueInst &EV); Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); Instruction *foldFPSignBitOps(BinaryOperator &I); + Instruction *foldFDivConstantDivisor(BinaryOperator &I); // Optimize one of these forms: // and i1 Op, SI / select i1 Op, i1 SI, i1 false (if IsAnd = true) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index abc88e35cf2af..96275302e86d9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1403,7 +1403,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { } /// Remove negation and try to convert division into multiplication. -static Instruction *foldFDivConstantDivisor(BinaryOperator &I) { +Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) { Constant *C; if (!match(I.getOperand(1), m_Constant(C))) return nullptr; @@ -1415,6 +1415,17 @@ static Instruction *foldFDivConstantDivisor(BinaryOperator &I) { if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) return BinaryOperator::CreateFDivFMF(X, NegC, &I); + // nnan X / +0.0 -> copysign(inf, X) + if (I.hasNoNaNs() && match(I.getOperand(1), m_Zero())) { + IRBuilder<> B(&I); + // TODO: nnan nsz X / -0.0 -> copysign(inf, X) + CallInst *CopySign = B.CreateIntrinsic( + Intrinsic::copysign, {C->getType()}, + {ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I); + CopySign->takeName(&I); + return replaceInstUsesWith(I, CopySign); + } + // If the constant divisor has an exact inverse, this is always safe. If not, // then we can still create a reciprocal if fast-math-flags allow it and the // constant is a regular number (not zero, infinite, or denormal). diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 38557c326d129..dd49f8bca91da 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -950,7 +950,7 @@ define float @fdiv_zero_f32(float %x) { ; https://alive2.llvm.org/ce/z/gLBFKB define float @fdiv_nnan_zero_f32(float %x) { ; CHECK-LABEL: @fdiv_nnan_zero_f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[FDIV:%.*]] = call nnan float @llvm.copysign.f32(float 0x7FF0000000000000, float [[X:%.*]]) ; CHECK-NEXT: ret float [[FDIV]] ; %fdiv = fdiv nnan float %x, 0.0 @@ -959,7 +959,7 @@ define float @fdiv_nnan_zero_f32(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan <2 x float> %x, zeroinitializer @@ -968,7 +968,7 @@ define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { define float @fdiv_nnan_zero_f32_fmf(float %x) { ; CHECK-LABEL: @fdiv_nnan_zero_f32_fmf( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz float @llvm.copysign.f32(float 0x7FF0000000000000, float [[X:%.*]]) ; CHECK-NEXT: ret float [[FDIV]] ; %fdiv = fdiv nnan nsz float %x, 0.0 @@ -977,7 +977,7 @@ define float @fdiv_nnan_zero_f32_fmf(float %x) { define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) { ; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf( -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FDIV]] ; %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer From b293de988059cac25c4ad84371be09107de7bbc4 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Mon, 7 Nov 2022 21:59:48 -0800 Subject: [PATCH 498/516] [Docs] Add my Office Hours --- llvm/docs/GettingInvolved.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 889fb91ba2f63..50961ceabb863 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -318,6 +318,11 @@ don't find anyone present, chances are they happen to be off that day. `gcal `__ - `Google meet `__ - English, French + * - Paulo Matos (he/him) + - WebAssembly backend; LLVM IR; + - Monthly, 1st Monday of the month at 11:00am Europe/Berlin, for 30 minutes. + - `Igalia Jitsi `__ + - English, Portuguese, German Guidance for office hours hosts From ebac59999f926339f08936c050890c3364b5f130 Mon Sep 17 00:00:00 2001 From: Dmitry Makogon Date: Wed, 2 Nov 2022 19:44:22 +0700 Subject: [PATCH 499/516] [SimpleLoopUnswitch] Skip trivial selects in guards conditions unswitch candidates We do this for conditional branches, but not for guards for some reason. Fixes pr58666. Differential Revision: https://reviews.llvm.org/D137249 --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 3 +- ...trivial-unswitch-skip-selects-in-guards.ll | 42 ++++++++++++++++--- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index a434394f2cdac..f7ecc47947151 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2767,7 +2767,8 @@ static bool collectUnswitchCandidates( if (CollectGuards) for (auto &I : *BB) if (isGuard(&I)) { - auto *Cond = cast(&I)->getArgOperand(0); + auto *Cond = + skipTrivialSelect(cast(&I)->getArgOperand(0)); // TODO: Support AND, OR conditions and partial unswitching. if (!isa(Cond) && L.isLoopInvariant(Cond)) UnswitchCandidates.push_back({&I, {Cond}}); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll index 7f8862e160a02..91dd181b0a98a 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s ; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s @@ -7,9 +8,40 @@ declare void @llvm.experimental.guard(i1, ...) declare void @widget() ; REQUIRES: asserts -; XFAIL: * define void @foo(ptr addrspace(1) %arg, i64 %arg1) personality ptr @pluto { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 poison, 570 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP]], i1 true, i1 false +; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT_US:%.*]], label [[BB_SPLIT:%.*]] +; CHECK: bb.split.us: +; CHECK-NEXT: br label [[BB3_US:%.*]] +; CHECK: bb3.us: +; CHECK-NEXT: br label [[GUARDED_US:%.*]] +; CHECK: bb4.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB6_US:%.*]] unwind label [[BB7_SPLIT_US:%.*]] +; CHECK: bb6.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB3_US]] unwind label [[BB7_SPLIT_US]] +; CHECK: guarded.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB4_US:%.*]] unwind label [[BB7_SPLIT_US]] +; CHECK: bb7.split.us: +; CHECK-NEXT: [[TMP8_US:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb.split: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[DEOPT:%.*]] +; CHECK: deopt: +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false, i32 7) [ "deopt"() ] +; CHECK-NEXT: unreachable +; CHECK: bb7: +; CHECK-NEXT: ret void +; bb: %tmp = icmp slt i32 poison, 570 %tmp2 = select i1 %tmp, i1 true, i1 false @@ -18,19 +50,19 @@ bb: bb3: ; preds = %bb6, %bb call void (i1, ...) @llvm.experimental.guard(i1 %tmp2, i32 7) [ "deopt"() ] invoke void @widget() - to label %bb4 unwind label %bb7 + to label %bb4 unwind label %bb7 bb4: ; preds = %bb3 invoke void @widget() - to label %bb6 unwind label %bb7 + to label %bb6 unwind label %bb7 bb6: ; preds = %bb4 invoke void @widget() - to label %bb3 unwind label %bb7 + to label %bb3 unwind label %bb7 bb7: ; preds = %bb6, %bb4, %bb3 %tmp8 = landingpad { ptr, i32 } - cleanup + cleanup ret void } From 1c355352c73050881f68b3e2b673dc1c7919b67d Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Tue, 8 Nov 2022 14:04:10 +0800 Subject: [PATCH 500/516] [X86] Add In64BitMode predicates for LOCK_INC64m, LOCK_DEC64m These two instructions are only encodable in 64bit mode. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D137608 --- llvm/lib/Target/X86/X86InstrCompiler.td | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ab3abe8faca7c..09e31988e5bd8 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -801,9 +801,9 @@ def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs), return hasNoCarryFlagUses(SDValue(N, 0)); }]>; -let Predicates = [UseIncDec] in { - let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALURMW] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALURMW] in { + let Predicates = [UseIncDec] in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>, @@ -816,10 +816,6 @@ let Predicates = [UseIncDec] in { "inc{l}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; - def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "inc{q}\t$dst", - [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, - LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", @@ -833,20 +829,33 @@ let Predicates = [UseIncDec] in { "dec{l}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; + } + + let Predicates = [UseIncDec, In64BitMode] in { + def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "inc{q}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, + LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>, LOCK; } +} +let Predicates = [UseIncDec] in { // Additional patterns for -1 constant. def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>; - def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>; +} + +let Predicates = [UseIncDec, In64BitMode] in { + // Additional patterns for -1 constant. + def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } From 70c781f4b6f4b9aa851dbf950b53569a8f8bd1e8 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 7 Nov 2022 11:33:43 +0000 Subject: [PATCH 501/516] [SIFoldOperands] Move `isFoldableCopy` into a separate helper, NFC. There was quite a bit of logic there that was just in the middle of core loop. I think it makes it easier to follow when it's split off in a separate helper like the others. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D137538 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 151 ++++++++++++---------- 1 file changed, 80 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4897f481bf3c9..36a7f8f3ad927 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -111,6 +111,8 @@ class SIFoldOperands : public MachineFunctionPass { bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; + bool tryFoldFoldableCopy(MachineInstr &MI, + MachineOperand *&CurrentKnownM0Val) const; const MachineOperand *isClamp(const MachineInstr &MI) const; bool tryFoldClamp(MachineInstr &MI); @@ -1292,6 +1294,73 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI, return true; } +bool SIFoldOperands::tryFoldFoldableCopy( + MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const { + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + return true; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) + ? nullptr + : &NewM0Val; + return false; + } + + MachineOperand &OpToFold = MI.getOperand(1); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); + + // FIXME: We could also be folding things like TargetIndexes. + if (!FoldingImm && !OpToFold.isReg()) + return false; + + if (OpToFold.isReg() && !OpToFold.getReg().isVirtual()) + return false; + + // Prevent folding operands backwards in the function. For example, + // the COPY opcode must not be replaced by 1 in this example: + // + // %3 = COPY %vgpr0; VGPR_32:%3 + // ... + // %vgpr0 = V_MOV_B32_e32 1, implicit %exec + if (!MI.getOperand(0).getReg().isVirtual()) + return false; + + bool Changed = foldInstOperand(MI, OpToFold); + + // If we managed to fold all uses of this copy then we might as well + // delete it now. + // The only reason we need to follow chains of copies here is that + // tryFoldRegSequence looks forward through copies before folding a + // REG_SEQUENCE into its eventual users. + auto *InstToErase = &MI; + while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { + auto &SrcOp = InstToErase->getOperand(1); + auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); + InstToErase->eraseFromParent(); + Changed = true; + InstToErase = nullptr; + if (!SrcReg || SrcReg.isPhysical()) + break; + InstToErase = MRI->getVRegDef(SrcReg); + if (!InstToErase || !TII->isFoldableCopy(*InstToErase)) + break; + } + + if (InstToErase && InstToErase->isRegSequence() && + MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { + InstToErase->eraseFromParent(); + Changed = true; + } + + return Changed; +} + // Clamp patterns are canonically selected to v_max_* instructions, so only // handle them. const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { @@ -1746,82 +1815,22 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!TII->isFoldableCopy(MI)) { - // Saw an unknown clobber of m0, so we no longer know what it is. - if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) - CurrentKnownM0Val = nullptr; - - // TODO: Omod might be OK if there is NSZ only on the source - // instruction, and not the omod multiply. - if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || - !tryFoldOMod(MI)) - Changed |= tryFoldClamp(MI); - + if (TII->isFoldableCopy(MI)) { + Changed |= tryFoldFoldableCopy(MI, CurrentKnownM0Val); continue; } - // Specially track simple redefs of m0 to the same value in a block, so we - // can erase the later ones. - if (MI.getOperand(0).getReg() == AMDGPU::M0) { - MachineOperand &NewM0Val = MI.getOperand(1); - if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { - MI.eraseFromParent(); - Changed = true; - continue; - } - - // We aren't tracking other physical registers - CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? - nullptr : &NewM0Val; - continue; - } - - MachineOperand &OpToFold = MI.getOperand(1); - bool FoldingImm = - OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); - - // FIXME: We could also be folding things like TargetIndexes. - if (!FoldingImm && !OpToFold.isReg()) - continue; - - if (OpToFold.isReg() && !OpToFold.getReg().isVirtual()) - continue; + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; - // Prevent folding operands backwards in the function. For example, - // the COPY opcode must not be replaced by 1 in this example: - // - // %3 = COPY %vgpr0; VGPR_32:%3 - // ... - // %vgpr0 = V_MOV_B32_e32 1, implicit %exec - if (!MI.getOperand(0).getReg().isVirtual()) - continue; - - Changed |= foldInstOperand(MI, OpToFold); - - // If we managed to fold all uses of this copy then we might as well - // delete it now. - // The only reason we need to follow chains of copies here is that - // tryFoldRegSequence looks forward through copies before folding a - // REG_SEQUENCE into its eventual users. - auto *InstToErase = &MI; - while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { - auto &SrcOp = InstToErase->getOperand(1); - auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); - InstToErase->eraseFromParent(); - Changed = true; - InstToErase = nullptr; - if (!SrcReg || SrcReg.isPhysical()) - break; - InstToErase = MRI->getVRegDef(SrcReg); - if (!InstToErase || !TII->isFoldableCopy(*InstToErase)) - break; - } - if (InstToErase && InstToErase->isRegSequence() && - MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { - InstToErase->eraseFromParent(); - Changed = true; - } + // TODO: Omod might be OK if there is NSZ only on the source + // instruction, and not the omod multiply. + if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || + !tryFoldOMod(MI)) + Changed |= tryFoldClamp(MI); } } + return Changed; } From 13f83365cdb5bb752066ee8f4149ae24dfe46cf1 Mon Sep 17 00:00:00 2001 From: haoyuintel Date: Tue, 8 Nov 2022 14:51:14 +0800 Subject: [PATCH 502/516] [Driver] Add -fsample-profile-use-profi This patch enable `-sample-profile-use-profi` in Clang frontend as user-facing feature. By using this patch, we can use the cflag of `-fsample-profile-use-profi` instead of `-mllvm -sample-profile-use-profi`. Reviewed By: hans, MaskRay Differential Revision: https://reviews.llvm.org/D136846 --- clang/docs/UsersManual.rst | 9 +++++++++ clang/include/clang/Driver/Options.td | 7 +++++++ clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++++ clang/test/Driver/pgo-sample-use-profi.c | 4 ++++ 4 files changed, 26 insertions(+) create mode 100644 clang/test/Driver/pgo-sample-use-profi.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 11bc5c9066111..9b03db9e0f742 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2241,6 +2241,15 @@ usual build cycle when using sample profilers for optimization: $ clang++ -O2 -gline-tables-only -fprofile-sample-use=code.prof code.cc -o code + [OPTIONAL] Sampling-based profiles can have inaccuracies or missing block/ + edge counters. The profile inference algorithm (profi) can be used to infer + missing blocks and edge counts, and improve the quality of profile data. + Enable it with ``-fsample-profile-use-profi``. + + .. code-block:: console + + $ clang++ -O2 -gline-tables-only -fprofile-sample-use=code.prof \ + -fsample-profile-use-profi code.cc -o code Sample Profile Formats """""""""""""""""""""" diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 608840b2d3691..ca16bd9765598 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1251,6 +1251,13 @@ def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">, as cold. Otherwise, treat callsites without profile samples as if we have no profile}]>, MarshallingInfoFlag>; +def fsample_profile_use_profi : Flag<["-"], "fsample-profile-use-profi">, + Flags<[NoXarchOption, CC1Option]>, Group, + HelpText<"Use profi to infer block and edge counts">, + DocBrief<[{Infer block and edge counts. If the profiles have errors or missing + blocks caused by sampling, profile inference (profi) can convert + basic block counts to branch probabilites to fix them by extended + and re-engineered classic MCMF (min-cost max-flow) approach.}]>; def fno_profile_sample_accurate : Flag<["-"], "fno-profile-sample-accurate">, Group, Flags<[NoXarchOption]>; def fauto_profile : Flag<["-"], "fauto-profile">, Group, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bffc8dc611605..4e404c579a57f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5729,6 +5729,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); + if (getLastProfileSampleUseArg(Args) && + Args.hasArg(options::OPT_fsample_profile_use_profi)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sample-profile-use-profi"); + } + // Add runtime flag for PS4/PS5 when PGO, coverage, or sanitizers are enabled. if (RawTriple.isPS() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { diff --git a/clang/test/Driver/pgo-sample-use-profi.c b/clang/test/Driver/pgo-sample-use-profi.c new file mode 100644 index 0000000000000..454a511a06281 --- /dev/null +++ b/clang/test/Driver/pgo-sample-use-profi.c @@ -0,0 +1,4 @@ +/// Test if profi flat is enabled in frontend as user-facing feature. +// RUN: %clang -c -fsample-profile-use-profi -fprofile-sample-use=/dev/null -### %s 2>&1 | FileCheck %s + +// CHECK: "-mllvm" "-sample-profile-use-profi" From b5f9972345f0305d6e71cc3cddbb1da65fd298d5 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 7 Nov 2022 11:32:44 +0000 Subject: [PATCH 503/516] [SIFoldOperands] Small code cleanups, NFC. I've been trying to understand the backend better and decided to read the code of this pass. While doing so, I noticed parts that could be refactored to be a tiny bit clearer. I tried to keep the changes minimal, a non-exhaustive list of changes is: - Stylistic changes to better fit LLVM's coding style - Removing dead/useless functions (e.g. FoldCandidate had getters, but it's a public struct!) - Saving regs/opcodes in variables if they're going to be used multiple times in the same condition Reviewed By: arsenm, foad Differential Revision: https://reviews.llvm.org/D137539 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 144 +++++++++------------- 1 file changed, 59 insertions(+), 85 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 36a7f8f3ad927..310565d96fea6 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -62,17 +62,7 @@ struct FoldCandidate { bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } - bool isCommuted() const { - return Commuted; - } - - bool needsShrink() const { - return ShrinkOpcode != -1; - } - - int getShrinkOpcode() const { - return ShrinkOpcode; - } + bool needsShrink() const { return ShrinkOpcode != -1; } }; class SIFoldOperands : public MachineFunctionPass { @@ -175,19 +165,17 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo, if (!OpToFold.isFI()) return false; + const unsigned Opc = UseMI.getOpcode(); if (TII->isMUBUF(UseMI)) - return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::vaddr); + return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); if (!TII->isFLATScratch(UseMI)) return false; - int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::saddr); + int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); if (OpNo == SIdx) return true; - int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::vaddr); + int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); return OpNo == VIdx && SIdx == -1; } @@ -200,11 +188,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); + + const uint64_t TSFlags = MI->getDesc().TSFlags; if (Fold.isImm()) { - if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked && - !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) && - (!ST->hasDOTOpSelHazard() || - !(MI->getDesc().TSFlags & SIInstrFlags::IsDOT)) && + if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) && + (!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) && AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST->hasInv2PiInlineImm())) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is @@ -258,7 +246,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { return false; } - int Op32 = Fold.getShrinkOpcode(); + int Op32 = Fold.ShrinkOpcode; MachineOperand &Dst0 = MI->getOperand(0); MachineOperand &Dst1 = MI->getOperand(1); assert(Dst0.isDef() && Dst1.isDef()); @@ -287,7 +275,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MI->removeOperand(I); MI->setDesc(TII->get(AMDGPU::IMPLICIT_DEF)); - if (Fold.isCommuted()) + if (Fold.Commuted) TII->commuteInstruction(*Inst32, false); return true; } @@ -325,11 +313,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { static bool isUseMIInFoldList(ArrayRef FoldList, const MachineInstr *MI) { - for (auto Candidate : FoldList) { - if (Candidate.UseMI == MI) - return true; - } - return false; + return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; }); } static void appendFoldCandidate(SmallVectorImpl &FoldList, @@ -488,7 +472,6 @@ bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI, } return true; - //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } // Find a def of the UseReg, check if it is a reg_sequence and find initializers @@ -608,10 +591,9 @@ void SIFoldOperands::foldOperand( return; // FIXME: Fold operands with subregs. - if (UseOp.isReg() && OpToFold.isReg()) { - if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) - return; - } + if (UseOp.isReg() && OpToFold.isReg() && + (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)) + return; // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the @@ -661,12 +643,11 @@ void SIFoldOperands::foldOperand( // safe to fold the addressing mode, even pre-GFX9. UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex()); + const unsigned Opc = UseMI->getOpcode(); if (TII->isFLATScratch(*UseMI) && - AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), - AMDGPU::OpName::vaddr) != -1 && - AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), - AMDGPU::OpName::saddr) == -1) { - unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode()); + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1 && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr) == -1) { + unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc); UseMI->setDesc(TII->get(NewOpc)); } @@ -702,8 +683,10 @@ void SIFoldOperands::foldOperand( Use.getParent()->getOperandNo(&Use), &UseMI->getOperand(1)); } + for (auto &F : CopyUses) { - foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace); + foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, + CopiesToReplace); } } @@ -828,15 +811,15 @@ void SIFoldOperands::foldOperand( if (Size != 4) return; - if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg())) + + Register Reg0 = UseMI->getOperand(0).getReg(); + Register Reg1 = UseMI->getOperand(1).getReg(); + if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64)); - else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg())) + else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64)); - else if (ST->hasGFX90AInsts() && - TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg())) + else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) && + TRI->isAGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32)); return; } @@ -1020,10 +1003,12 @@ static unsigned getMovOpc(bool IsScalar) { return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } -/// Remove any leftover implicit operands from mutating the instruction. e.g. -/// if we replace an s_and_b32 with a copy, we don't need the implicit scc def -/// anymore. -static void stripExtraCopyOperands(MachineInstr &MI) { +static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { + MI.setDesc(NewDesc); + + // Remove any leftover implicit operands from mutating the instruction. e.g. + // if we replace an s_and_b32 with a copy, we don't need the implicit scc def + // anymore. const MCInstrDesc &Desc = MI.getDesc(); unsigned NumOps = Desc.getNumOperands() + Desc.getNumImplicitUses() + @@ -1033,24 +1018,18 @@ static void stripExtraCopyOperands(MachineInstr &MI) { MI.removeOperand(I); } -static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { - MI.setDesc(NewDesc); - stripExtraCopyOperands(MI); -} - MachineOperand * SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const { - if (Op.isReg()) { - // If this has a subregister, it obviously is a register source. - if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual()) - return &Op; - - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); - if (Def && Def->isMoveImmediate()) { - MachineOperand &ImmSrc = Def->getOperand(1); - if (ImmSrc.isImm()) - return &ImmSrc; - } + // If this has a subregister, it obviously is a register source. + if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || + !Op.getReg().isVirtual()) + return &Op; + + MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (Def && Def->isMoveImmediate()) { + MachineOperand &ImmSrc = Def->getOperand(1); + if (ImmSrc.isImm()) + return &ImmSrc; } return &Op; @@ -1127,9 +1106,8 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { return true; } - if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 || - MI->getOpcode() == AMDGPU::V_AND_B32_e32 || - MI->getOpcode() == AMDGPU::S_AND_B32) { + if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 || + Opc == AMDGPU::S_AND_B32) { if (Src1Val == 0) { // y = and x, 0 => y = v_mov_b32 0 MI->removeOperand(Src0Idx); @@ -1138,16 +1116,14 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { // y = and x, -1 => y = copy x MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); - stripExtraCopyOperands(*MI); } else return false; return true; } - if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 || - MI->getOpcode() == AMDGPU::V_XOR_B32_e32 || - MI->getOpcode() == AMDGPU::S_XOR_B32) { + if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 || + Opc == AMDGPU::S_XOR_B32) { if (Src1Val == 0) { // y = xor x, 0 => y = copy x MI->removeOperand(Src1Idx); @@ -1210,14 +1186,13 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const { Register Src1 = MI.getOperand(2).getReg(); MachineInstr *SrcDef = MRI->getVRegDef(Src1); - if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) { - Register Dst = MI.getOperand(0).getReg(); - MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg()); - MI.eraseFromParent(); - return true; - } + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - return false; + Register Dst = MI.getOperand(0).getReg(); + MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg()); + MI.eraseFromParent(); + return true; } bool SIFoldOperands::foldInstOperand(MachineInstr &MI, @@ -1286,7 +1261,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI); - } else if (Fold.isCommuted()) { + } else if (Fold.Commuted) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); } @@ -1735,9 +1710,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) { SmallVector Users; SmallVector MoveRegs; - for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) { + for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) Users.push_back(&I); - } + if (Users.empty()) return false; @@ -1750,9 +1725,8 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) { if (TRI->isAGPR(*MRI, DstReg)) continue; MoveRegs.push_back(DstReg); - for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) { + for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) Users.push_back(&U); - } } const TargetRegisterClass *RC = MRI->getRegClass(DefReg); From 7425077e31c9b505103a98299a728bc496bd933c Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 7 Nov 2022 11:54:18 +0000 Subject: [PATCH 504/516] [AMDGPU] Add & use `hasNamedOperand`, NFC In a lot of places, we were just calling `getNamedOperandIdx` to check if the result was != or == to -1. This is fine in itself, but it's verbose and doesn't make the intention clear, IMHO. I added a `hasNamedOperand` and replaced all cases I could find with regexes and manually. Reviewed By: arsenm, foad Differential Revision: https://reviews.llvm.org/D137540 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 100 +++++++++--------- .../Disassembler/AMDGPUDisassembler.cpp | 31 +++--- llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 24 ++--- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 3 +- .../AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 12 +-- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 4 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 6 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 16 ++- .../Target/AMDGPU/SILoadStoreOptimizer.cpp | 6 +- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 30 +++--- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 7 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 + 14 files changed, 116 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c32c56b1c8f32..c1eb61f2f4ac2 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3507,7 +3507,7 @@ bool AMDGPUAsmParser::validateConstantBusLimitations( return true; // Check special imm operands (used by madmk, etc) - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { + if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { ++NumLiterals; LiteralSize = 4; } @@ -7897,7 +7897,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); if (IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::tfe)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); if (!IsGFX10Plus) @@ -8205,9 +8205,9 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) { const int Ops[] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2 }; - for (SrcNum = 0; - SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; - ++SrcNum); + for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); + ++SrcNum) + ; assert(SrcNum > 0); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); @@ -8268,17 +8268,17 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyHigh); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI); } void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) @@ -8351,7 +8351,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) { // This instruction has src modifiers for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); @@ -8377,13 +8377,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI); // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): // it has src2 register operand that is tied to dst operand @@ -8427,7 +8427,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, Inst.addOperand(Inst.getOperand(0)); } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); } @@ -8885,7 +8885,7 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, unsigned Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); bool HasModifiers = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers); // MAC instructions are special because they have 'old' // operand which is not tied to dst (but assumed to be). @@ -8943,17 +8943,17 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, llvm_unreachable("unhandled operand type"); } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (Desc.TSFlags & SIInstrFlags::VOP3P) cvtVOP3P(Inst, Operands, OptionalIdx); else if (Desc.TSFlags & SIInstrFlags::VOP3) cvtVOP3OpSel(Inst, Operands, OptionalIdx); - else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); } @@ -8966,9 +8966,10 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); - } + + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyDppFi); } } @@ -8977,7 +8978,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I unsigned Opc = Inst.getOpcode(); bool HasModifiers = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers); unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { @@ -9038,7 +9039,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); } } @@ -9180,41 +9181,38 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, SkippedVcc = false; } - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && - Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && - Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + const unsigned Opc = Inst.getOpcode(); + if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && + Opc != AMDGPU::V_NOP_sdwa_vi) { // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::clamp) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::dst_sel) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::dst_unused) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -9222,7 +9220,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 43a1dfc7f561b..3969e8cf451c5 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -740,7 +740,7 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { - if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst)) // VOPC - insert clamp insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { @@ -804,7 +804,7 @@ bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const { if (OldIdx != -1 && Desc.getOperandConstraint( OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) { - assert(AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2) != -1); + assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2)); assert(Desc.getOperandConstraint( AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2), MCOI::OperandConstraint::TIED_TO) == DST_IDX); @@ -838,19 +838,19 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { auto Mods = collectVOPModifiers(MI); insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); } else { // Insert dummy unused src modifiers. if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src0_modifiers); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src1_modifiers); } @@ -865,7 +865,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { auto Mods = collectVOPModifiers(MI); insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); @@ -900,9 +900,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { assert(VDataIdx != -1); if (BaseOpcode->BVH) { // Add A16 operand for intersect_ray instructions - if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) { + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16)) addOperand(MI, MCOperand::createImm(1)); - } return MCDisassembler::Success; } @@ -1020,23 +1019,23 @@ DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { auto Mods = collectVOPModifiers(MI, true); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi), AMDGPU::OpName::op_sel_hi); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo), AMDGPU::OpName::neg_lo); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), AMDGPU::OpName::neg_hi); @@ -1049,16 +1048,16 @@ DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old)) insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src0_modifiers); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src1_modifiers); return MCDisassembler::Success; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index de245ef57def7..be4b477547ad3 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -272,8 +272,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src0_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -296,8 +295,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src1_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -333,18 +331,16 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, } if (HasVOP3DPP) { auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); - if (ClampOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) { + if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) { DPPInst.addImm(ClampOpr->getImm()); } auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); if (VdstInOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) { + AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) { DPPInst.add(*VdstInOpr); } auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); - if (OmodOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) { + if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) { DPPInst.addImm(OmodOpr->getImm()); } // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to @@ -357,7 +353,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel)) DPPInst.addImm(OpSel); } if (auto *OpSelHiOpr = @@ -371,17 +367,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi)) DPPInst.addImm(OpSelHi); } auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); - if (NegOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) { + if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) { DPPInst.addImm(NegOpr->getImm()); } auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); - if (NegHiOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) { + if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) { DPPInst.addImm(NegHiOpr->getImm()); } } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 81013db1f0034..cb1d5a6fdf003 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -919,8 +919,7 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { if (DstSel->getImm() == AMDGPU::SDWA::DWORD) return false; } else { - if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::op_sel) == -1) || + if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) || !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) ->getImm() & SISrcMods::DST_OP_SEL)) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 1ba05e765bc31..aa55ba5c1e291 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -297,12 +297,12 @@ uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const { using namespace AMDGPU::VOP3PEncoding; using namespace AMDGPU::OpName; - if (AMDGPU::getNamedOperandIdx(Opcode, op_sel_hi) != -1) { - if (AMDGPU::getNamedOperandIdx(Opcode, src2) != -1) + if (AMDGPU::hasNamedOperand(Opcode, op_sel_hi)) { + if (AMDGPU::hasNamedOperand(Opcode, src2)) return 0; - if (AMDGPU::getNamedOperandIdx(Opcode, src1) != -1) + if (AMDGPU::hasNamedOperand(Opcode, src1)) return OP_SEL_HI_2; - if (AMDGPU::getNamedOperandIdx(Opcode, src0) != -1) + if (AMDGPU::hasNamedOperand(Opcode, src0)) return OP_SEL_HI_1 | OP_SEL_HI_2; } return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2; @@ -369,9 +369,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, return; // Do not print literals from SISrc Operands for insts with mandatory literals - int ImmLitIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm); - if (ImmLitIdx != -1) + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm)) return; // Check for additional literals diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 310565d96fea6..11108f6a999d4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -645,8 +645,8 @@ void SIFoldOperands::foldOperand( const unsigned Opc = UseMI->getOpcode(); if (TII->isFLATScratch(*UseMI) && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1 && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr) == -1) { + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) && + !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) { unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc); UseMI->setDesc(TII->get(NewOpc)); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 64ebf1d2d8b60..b6243c986bd18 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11871,7 +11871,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && !TII->isGather4(Opcode) && - AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) { + AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::dmask)) { return adjustWritemask(Node, DAG); } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 4314595684710..a12fb3abdfedc 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -564,15 +564,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } if (Inst.mayStore()) { - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::data0) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data0)) { setExpScore( &Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), CurrScore); } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::data1) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data1)) { setExpScore(&Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data1), diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 775da4759358f..234e6c3e796aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -213,8 +213,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (isSMRD(Opc0) && isSMRD(Opc1)) { // Skip time and cache invalidation instructions. - if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || - AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) + if (!AMDGPU::hasNamedOperand(Opc0, AMDGPU::OpName::sbase) || + !AMDGPU::hasNamedOperand(Opc1, AMDGPU::OpName::sbase)) return false; unsigned NumOps = getNumOperandsNoGlue(Load0); @@ -3797,8 +3797,7 @@ bool SIInstrInfo::hasModifiers(unsigned Opcode) const { // The src0_modifier operand is present on all instructions // that have modifiers. - return AMDGPU::getNamedOperandIdx(Opcode, - AMDGPU::OpName::src0_modifiers) != -1; + return AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers); } bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, @@ -3891,10 +3890,10 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, // Add the dst operand if the 32-bit encoding also has an explicit $vdst. // For VOPC instructions, this is replaced by an implicit def of vcc. - if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst) != -1) { + if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::vdst)) { // dst Inst32.add(MI.getOperand(0)); - } else if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::sdst) != -1) { + } else if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::sdst)) { // VOPCX instructions won't be writing to an explicit dst, so this should // not fail for these instructions. assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || @@ -4852,9 +4851,8 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); if (DataIdx != -1) { - IsAllocatable = VDstIdx != -1 || - AMDGPU::getNamedOperandIdx(TID.Opcode, - AMDGPU::OpName::data1) != -1; + IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand( + TID.Opcode, AMDGPU::OpName::data1); } } return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 79f2826aa5cec..0eefce86f60ab 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -412,8 +412,8 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { } if (TII.isMIMG(Opc)) { // Ignore instructions encoded without vaddr. - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1) + if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) && + !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr0)) return UNKNOWN; // Ignore BVH instructions if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) @@ -1385,7 +1385,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( New.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)); // For convenience, when SGPR_IMM buffer loads are merged into a // zero-offset load, we generate its SGPR variant. - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset) != -1) + if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::offset)) New.addImm(MergedOffset); New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index e768a2f3e1a5d..b21dbb7626e6a 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1002,24 +1002,21 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy dst, if it is present in original then should also be present in SDWA MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::vdst)); SDWAInst.add(*Dst); } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { - assert(Dst && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + assert(Dst && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst)); SDWAInst.add(*Dst); } else { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst)); SDWAInst.addReg(TRI->getVCC(), RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and // src0_modifiers (except for v_nop_sdwa, but it can't get here) MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - assert( - Src0 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); + assert(Src0 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0) && + AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_modifiers)); if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) SDWAInst.addImm(Mod->getImm()); else @@ -1029,9 +1026,8 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy src1 if present, initialize src1_modifiers. MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) { - assert( - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1) && + AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_modifiers)); if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) SDWAInst.addImm(Mod->getImm()); else @@ -1050,7 +1046,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy clamp if present, initialize otherwise - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::clamp)); MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); if (Clamp) { SDWAInst.add(*Clamp); @@ -1059,7 +1055,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy omod if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::omod)) { MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); if (OMod) { SDWAInst.add(*OMod); @@ -1069,7 +1065,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy dst_sel if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_sel)) { MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); if (DstSel) { SDWAInst.add(*DstSel); @@ -1079,7 +1075,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy dst_unused if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_unused)) { MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); if (DstUnused) { SDWAInst.add(*DstUnused); @@ -1089,7 +1085,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy src0_sel if present, initialize otherwise - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_sel)); MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); if (Src0Sel) { SDWAInst.add(*Src0Sel); @@ -1099,7 +1095,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy src1_sel if present, initialize otherwise if needed if (Src1) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_sel)); MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); if (Src1Sel) { SDWAInst.add(*Src1Sel); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 99292129257f2..bb656329b3d41 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1235,10 +1235,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize) { bool IsStore = TII->get(LoadStoreOp).mayStore(); - bool HasVAddr = AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) != -1; + bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr); bool UseST = - !HasVAddr && - AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0; + !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr); switch (EltSize) { case 4: @@ -2140,7 +2139,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!Offset) { unsigned Opc = MI->getOpcode(); int NewOpc = -1; - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) { NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); } else if (ST.hasFlatScratchSTMode()) { // On GFX10 we have ST mode to use no registers for an address. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index dda515595e4d9..80e4dada6b36a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -430,7 +430,7 @@ unsigned getVOPDOpcode(unsigned Opc) { } bool isVOPD(unsigned Opc) { - return AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0X) != -1; + return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); } bool isTrue16Inst(unsigned Opc) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a8642a0d1da85..778987cb03e76 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -299,6 +299,11 @@ unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { + return getNamedOperandIdx(Opcode, NamedIdx) != -1; +} + LLVM_READONLY int getSOPPWithRelaxation(uint16_t Opcode); From d1f90b61292045d395ba5b94bc9098f4b5d4e55d Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 8 Nov 2022 15:53:08 +0800 Subject: [PATCH 505/516] [NFC] [C++20] [Modules] Rename ASTWriter::isWritingStdCXXNamedModules According to the discussion in https://discourse.llvm.org/t/rfc-unifying-the-terminology-about-modules-in-clang/66054, this patch rename ASTWriter::isWritingNamedModules to ASTWriter::isWrittingStdCXXNamedModules to make the name more clear. --- clang/include/clang/Basic/Module.h | 3 ++- clang/include/clang/Serialization/ASTWriter.h | 2 +- clang/lib/Serialization/ASTWriter.cpp | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 3e9669ced1009..c41ae41737898 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -159,7 +159,8 @@ class alignas(8) Module { /// eventually be exposed, for use in "private" modules. std::string ExportAsModule; - /// Does this Module scope describe part of the purview of a named C++ module? + /// Does this Module scope describe part of the purview of a standard named + /// C++ module? bool isModulePurview() const { return Kind == ModuleInterfaceUnit || Kind == ModulePartitionInterface || Kind == ModulePartitionImplementation || diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index cb929fc19bd21..09ee1744e8945 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -736,7 +736,7 @@ class ASTWriter : public ASTDeserializationListener, bool hasChain() const { return Chain; } ASTReader *getChain() const { return Chain; } - bool isWritingNamedModules() const { + bool isWritingStdCXXNamedModules() const { return WritingModule && WritingModule->isModulePurview(); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3658d69908c3c..1dcb5426b314d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4334,7 +4334,8 @@ void ASTRecordWriter::AddAttr(const Attr *A) { // FIXME: Clang can't handle the serialization/deserialization of // preferred_name properly now. See // https://github.com/llvm/llvm-project/issues/56490 for example. - if (!A || (isa(A) && Writer->isWritingNamedModules())) + if (!A || (isa(A) && + Writer->isWritingStdCXXNamedModules())) return Record.push_back(0); Record.push_back(A->getKind() + 1); // FIXME: stable encoding, target attrs From 767999fca84819c74166bc6768806ed72f00e601 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Wed, 28 Sep 2022 07:47:35 +0000 Subject: [PATCH 506/516] [AMDGPU][GlobalISel] Support mad/fma_mix selection Adds support for selecting the following instructions using GlobalISel: - v_mad_mix/v_fma_mix - v_mad_mixhi/v_fma_mixhi - v_mad_mixlo/v_fma_mixlo To select those instructions properly, some additional changes were needed which impacted other tests as well. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D134354 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 190 ++ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 + llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 4 +- .../GlobalISel/combine-fma-add-ext-fma.ll | 214 +- .../GlobalISel/combine-fma-add-ext-mul.ll | 87 +- .../GlobalISel/combine-fma-sub-ext-mul.ll | 50 +- .../GlobalISel/combine-fma-sub-ext-neg-mul.ll | 114 +- llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll | 556 ++-- llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll | 2372 +++++++++++------ llvm/test/CodeGen/AMDGPU/mad-mix.ll | 1496 +++++++---- 11 files changed, 3375 insertions(+), 1717 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index aa36045491701..c2b084bc0779d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -153,6 +153,10 @@ def gi_smrd_buffer_sgpr_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3_mad_mix_mods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // Separate load nodes are defined to glue m0 initialization in // SelectionDAG. The GISel selector can just insert m0 initialization // directly before selecting a glue-less load, so hide this diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 0a68966935105..2538d175dde2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -522,6 +522,60 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_FMA_FMAD(MachineInstr &I) const { + assert(I.getOpcode() == AMDGPU::G_FMA || I.getOpcode() == AMDGPU::G_FMAD); + + // Try to manually select MAD_MIX/FMA_MIX. + Register Dst = I.getOperand(0).getReg(); + LLT ResultTy = MRI->getType(Dst); + bool IsFMA = I.getOpcode() == AMDGPU::G_FMA; + if (ResultTy != LLT::scalar(32) || + (IsFMA ? !Subtarget->hasFmaMixInsts() : !Subtarget->hasMadMixInsts())) + return false; + + // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand + // using the conversion from f16. + bool MatchedSrc0, MatchedSrc1, MatchedSrc2; + auto [Src0, Src0Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(1), MatchedSrc0); + auto [Src1, Src1Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(2), MatchedSrc1); + auto [Src2, Src2Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(3), MatchedSrc2); + +#ifndef NDEBUG + const SIMachineFunctionInfo *MFI = + I.getMF()->getInfo(); + AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode(); + assert((IsFMA || !Mode.allFP32Denormals()) && + "fmad selected with denormals enabled"); +#endif + + // TODO: We can select this with f32 denormals enabled if all the sources are + // converted from f16 (in which case fmad isn't legal). + if (!MatchedSrc0 && !MatchedSrc1 && !MatchedSrc2) + return false; + + const unsigned OpC = IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32; + MachineInstr *MixInst = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst) + .addImm(Src0Mods) + .addReg(Src0) + .addImm(Src1Mods) + .addReg(Src1) + .addImm(Src2Mods) + .addReg(Src2) + .addImm(0) + .addImm(0) + .addImm(0); + + if (!constrainSelectedInstRegOperands(*MixInst, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); Register DstReg = MI.getOperand(0).getReg(); @@ -3228,6 +3282,11 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_FABS(I); case TargetOpcode::G_EXTRACT: return selectG_EXTRACT(I); + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + if (selectG_FMA_FMAD(I)) + return true; + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: return selectG_MERGE_VALUES(I); @@ -4679,6 +4738,137 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}}; } +// Variant of stripBitCast that returns the instruction instead of a +// MachineOperand. +static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) { + if (MI->getOpcode() == AMDGPU::G_BITCAST) + return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI); + return MI; +} + +// Figure out if this is really an extract of the high 16-bits of a dword, +// returns nullptr if it isn't. +static MachineInstr *isExtractHiElt(MachineInstr *Inst, + MachineRegisterInfo &MRI) { + Inst = stripBitCast(Inst, MRI); + + if (Inst->getOpcode() != AMDGPU::G_TRUNC) + return nullptr; + + MachineInstr *TruncOp = + getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI); + TruncOp = stripBitCast(TruncOp, MRI); + + // G_LSHR x, (G_CONSTANT i32 16) + if (TruncOp->getOpcode() == AMDGPU::G_LSHR) { + auto SrlAmount = getIConstantVRegValWithLookThrough( + TruncOp->getOperand(2).getReg(), MRI); + if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) { + MachineInstr *SrlOp = + getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); + return stripBitCast(SrlOp, MRI); + } + } + + // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0) + // 1, 0 swaps the low/high 16 bits. + // 1, 1 sets the high 16 bits to be the same as the low 16. + // in any case, it selects the high elts. + if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) { + assert(MRI.getType(TruncOp->getOperand(0).getReg()) == + LLT::fixed_vector(2, 16)); + + ArrayRef Mask = TruncOp->getOperand(3).getShuffleMask(); + assert(Mask.size() == 2); + + if (Mask[0] == 1 && Mask[1] <= 1) { + MachineInstr *LHS = + getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); + return stripBitCast(LHS, MRI); + } + } + + return nullptr; +} + +std::pair +AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root, + bool &Matched) const { + Matched = false; + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + + MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); + if (MI->getOpcode() == AMDGPU::G_FPEXT) { + MachineOperand *MO = &MI->getOperand(1); + Src = MO->getReg(); + MI = getDefIgnoringCopies(Src, *MRI); + + assert(MRI->getType(Src) == LLT::scalar(16)); + + // See through bitcasts. + // FIXME: Would be nice to use stripBitCast here. + if (MI->getOpcode() == AMDGPU::G_BITCAST) { + MO = &MI->getOperand(1); + Src = MO->getReg(); + MI = getDefIgnoringCopies(Src, *MRI); + } + + const auto CheckAbsNeg = [&]() { + // Be careful about folding modifiers if we already have an abs. fneg is + // applied last, so we don't want to apply an earlier fneg. + if ((Mods & SISrcMods::ABS) == 0) { + unsigned ModsTmp; + std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO); + MI = getDefIgnoringCopies(Src, *MRI); + + if ((ModsTmp & SISrcMods::NEG) != 0) + Mods ^= SISrcMods::NEG; + + if ((ModsTmp & SISrcMods::ABS) != 0) + Mods |= SISrcMods::ABS; + } + }; + + CheckAbsNeg(); + + // op_sel/op_sel_hi decide the source type and source. + // If the source's op_sel_hi is set, it indicates to do a conversion from + // fp16. If the sources's op_sel is set, it picks the high half of the + // source register. + + Mods |= SISrcMods::OP_SEL_1; + + if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) { + Mods |= SISrcMods::OP_SEL_0; + MI = ExtractHiEltMI; + MO = &MI->getOperand(0); + Src = MO->getReg(); + + CheckAbsNeg(); + } + + Matched = true; + } + + return {Src, Mods}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const { + Register Src; + unsigned Mods; + bool Matched; + std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index e444370fdd070..f48976953fdd5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -97,6 +97,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; + bool selectG_FMA_FMAD(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; bool selectG_BUILD_VECTOR(MachineInstr &I) const; @@ -293,6 +294,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector { ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const; ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const; + std::pair selectVOP3PMadMixModsImpl(MachineOperand &Root, + bool &Matched) const; + ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const; + void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx = -1) const; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 679f6db7453fd..78c4455fc9848 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -168,7 +168,7 @@ multiclass MadFmaMixPats; def : GCNPat < @@ -181,7 +181,7 @@ multiclass MadFmaMixPats; def : GCNPat < diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll index ec96e2f26d675..cc459814e62ca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll @@ -8,9 +8,7 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_mad_f32 v2, v3, v4, v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: ; return to shader part epilog @@ -18,25 +16,22 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float % ; GFX10-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul half %u, %v @@ -50,12 +45,8 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float % define amdgpu_vs float @test_f16_f32_add_ext_fma_mul(half %x, half %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v4 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v2 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v5, v1 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_ext_fma_mul: @@ -94,34 +85,29 @@ define amdgpu_vs float @test_f16_f32_add_ext_fma_mul(half %x, half %y, float %z, define amdgpu_vs float @test_f16_f32_add_fma_ext_mul_rhs(float %x, float %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul half %u, %v @@ -135,12 +121,8 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul_rhs(float %x, float %y, flo define amdgpu_vs float @test_f16_f32_add_ext_fma_mul_rhs(float %x, half %y, half %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_ext_fma_mul_rhs: @@ -181,72 +163,56 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_fma_ext_mul(<4 x float> %x, < ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v0, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v1, v5 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v2, v6 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v3, v7 -; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul <4 x half> %u, %v @@ -339,72 +305,56 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_fma_ext_mul_rhs(<4 x float> % ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v4, v8 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v5, v9 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v6, v10 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v7, v11 -; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul <4 x half> %u, %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll index 60e471d30413a..f3e5615783639 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll @@ -8,16 +8,15 @@ define amdgpu_vs float @test_f16_f32_add_ext_mul(half inreg %x, half inreg %y, float inreg %z) { ; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul: ; GFX9-FAST-DENORM: ; %bb.0: ; %.entry -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0] ; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %x, %y @@ -29,16 +28,15 @@ define amdgpu_vs float @test_f16_f32_add_ext_mul(half inreg %x, half inreg %y, f define amdgpu_vs float @test_f16_f32_add_ext_mul_rhs(half inreg %x, half inreg %y, float inreg %z) { ; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs: ; GFX9-FAST-DENORM: ; %bb.0: ; %.entry -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0] ; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %x, %y @@ -70,25 +68,16 @@ define amdgpu_vs <5 x float> @test_5xf16_5xf32_add_ext_mul(<5 x half> inreg %x, ; ; GFX10-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s11, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s1, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s4, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s11 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s12 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s3 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s13 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s4 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s14 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s5 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v5, s6 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v6, s7 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v7, s8 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v8, s9 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v9, s10 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <5 x half> %x, %y @@ -122,30 +111,18 @@ define amdgpu_vs <6 x float> @test_6xf16_6xf32_add_ext_mul_rhs(<6 x half> inreg ; ; GFX10-FAST-DENORM-LABEL: test_6xf16_6xf32_add_ext_mul_rhs: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s1, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s2, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s0, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s1, s4, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s2, s5, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s12 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s13 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s14 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s3 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s4 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v10, s5 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v11, s2 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v6, s6 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v7, s7 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v8, s8 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v9, s9 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v10, s10 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v5, v5, v11, s11 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v5, s11 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v5, s2, s5, v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <6 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll index d846ca98f9419..d225626ff62bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll @@ -6,16 +6,12 @@ define amdgpu_vs float @test_f16_to_f32_sub_ext_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -28,16 +24,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_ext_mul_rhs(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %y, %z @@ -64,18 +56,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul(<4 x half> %x, <4 ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -102,18 +88,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul_rhs(<4 x float> %x ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <4 x half> %y, %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll index 84002c0e3f22b..920f099164349 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll @@ -6,16 +6,12 @@ define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -29,16 +25,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -53,16 +45,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul2(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %y, %z @@ -76,16 +64,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul2(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %y, %z @@ -113,20 +97,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -154,20 +131,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -196,20 +166,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z @@ -237,20 +199,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll index 0917022f84080..f46aa7736108c 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,SDAG-GFX9 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GISEL-GFX9 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 { ; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: @@ -10,23 +14,45 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %s ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -45,25 +71,51 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %s ; GFX9-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_mov_b32_e32 v0, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: v_mov_b32_e32 v0, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: s_movk_i32 s4, 0x3c00 +; GISEL-VI-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_e32 v0, s4, v0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3c00 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -81,25 +133,49 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src ; GFX9-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_mov_b32_e32 v0, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -111,30 +187,62 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src } define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 { -; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-GFX9: ; %bb.0: +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-GFX9: ; %bb.0: +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -147,30 +255,62 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, ha } define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 { -; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-GFX9: ; %bb.0: +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-GFX9: ; %bb.0: +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -190,23 +330,45 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half % ; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -225,23 +387,53 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -263,30 +455,67 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; VI-NEXT: flat_store_short v[0:1], v0 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: s_mov_b32 s7, 0xf000 -; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-VI-NEXT: flat_store_short v[0:1], v0 +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) +; SDAG-VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: s_mov_b32 s7, 0xf000 +; SDAG-CI-NEXT: s_mov_b32 s6, -1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp +; SDAG-CI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: flat_store_short v[0:1], v0 +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) +; GISEL-VI-NEXT: v_max_f16_e64 v0, v0, v0 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: s_mov_b32 s6, -1 +; GISEL-CI-NEXT: s_mov_b32 s7, 0xf000 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) +; GISEL-CI-NEXT: v_max_f32_e32 v1, v2, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -308,3 +537,6 @@ declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) # attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone speculatable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CI: {{.*}} +; VI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll index 451cc98f7ada9..004f9abdee8dc 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -1,22 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { -; GFX906-LABEL: mixlo_simple: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: mixlo_simple: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: mixlo_simple: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: mixlo_simple: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -24,31 +29,38 @@ define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: mixlo_simple: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: mixlo_simple: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: mixlo_simple: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) %cvt.result = fptrunc float %result to half ret half %cvt.result } define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -59,13 +71,23 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -75,18 +97,18 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src } define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -96,13 +118,22 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -111,18 +142,18 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2 } define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -132,13 +163,30 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr ; VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -149,13 +197,6 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr } define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -163,6 +204,13 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp +; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -172,13 +220,22 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -192,6 +249,14 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; operation only clobbers relevant lane. define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v2f32: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v2f32: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -200,52 +265,77 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half ; GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -255,76 +345,138 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half } define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v8, v6, v7 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_e32 v2, v4 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v3f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v3f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_mac_f32_e32 v7, v1, v4 -; CI-NEXT: v_mac_f32_e32 v6, v0, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v6 -; CI-NEXT: v_mac_f32_e32 v8, v2, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v3f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v8 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -334,95 +486,172 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half } define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v4f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v7 -; GFX906-NEXT: v_mov_b32_e32 v1, v6 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v4f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v7 +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v6 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v7 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v4f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v7 +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v6 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_mac_f32_e32 v10, v6, v8 -; VI-NEXT: v_mac_f32_e32 v11, v7, v9 -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v2, v5 -; VI-NEXT: v_cvt_f16_f32_e32 v3, v4 -; VI-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-NEXT: v_or_b32_e32 v0, v3, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9 +; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v3, v5 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v4f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_mac_f32_e32 v10, v2, v6 -; CI-NEXT: v_mac_f32_e32 v9, v1, v5 -; CI-NEXT: v_mac_f32_e32 v8, v0, v4 -; CI-NEXT: v_mac_f32_e32 v11, v3, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v4f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v7 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v4f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v7 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8 +; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v4 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v10 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v5 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v11 +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v4f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -434,6 +663,14 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half ; FIXME (DAG): Fold clamp define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -442,52 +679,91 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s ; GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v5 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -499,82 +775,167 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s } ; FIXME (DAG): Should be packed into 2 registers per argument? +; FIXME (GIsel): V_PK_MAX clamp could be folded into mixlo define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_pack_b32_f16 v1, v1, 0 -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0 +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_pack_b32_f16 v1, v1, 0 -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0 +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v8, v6, v7 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v8, v2, v5 -; CI-NEXT: v_mac_f32_e32 v6, v0, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_mac_f32_e32 v7, v1, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v7 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v6 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v4 clamp +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v8 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v5 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v2, v3, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v3 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v3 +; GISEL-CI-NEXT: v_min_f32_e32 v2, v2, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -586,6 +947,17 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s } define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GFX900-NEXT: v_mov_b32_e32 v1, v2 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -597,84 +969,154 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ; GFX906-NEXT: v_mov_b32_e32 v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9 +; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v10, v6, v8 -; VI-NEXT: v_mac_f32_e32 v11, v7, v9 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: v_or_b32_e32 v1, v3, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v3, v3 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8 +; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v4 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v10 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v11 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v11, v3, v7 -; CI-NEXT: v_mac_f32_e32 v8, v0, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v10, v2, v6 -; CI-NEXT: v_cvt_f32_f16_e64 v3, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v10 -; CI-NEXT: v_mac_f32_e32 v9, v1, v5 -; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v9 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v10 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v4, v11 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v3, v3, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v2, v4, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v2 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v2, v3, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v3, v4, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -685,61 +1127,140 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ret <4 x half> %clamp } +; FIXME (GISel): Packed Vectors handling isn't great for now, so we don't end up with +; a build_vector to select the mixhi. Issue is more specifically with how insert_vector_elt is being +; legalized (bitwise ops instead of shuffle/build_vector for instance). define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_max_f16_e64 v0, v3, v3 clamp +; GISEL-GFX900-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff0000 +; GISEL-GFX900-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v4, v3 +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_max_f16_e64 v0, v3, v3 clamp +; GISEL-GFX906-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff0000 +; GISEL-GFX906-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_max_f16_e64 v1, v0, v0 clamp +; GISEL-VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -753,60 +1274,139 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> } define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 16 +; GISEL-GFX900-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff +; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 16 +; GISEL-GFX906-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff +; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -822,64 +1422,117 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> ; FIXME (DAG): Should be able to use mixlo/mixhi define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp -; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -893,82 +1546,150 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr ; FIXME (DAG): Handling undef 4th component define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp -; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_or_b32_e32 v0, v0, v2 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp +; SDAG-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp -; CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v2, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v2, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp +; GISEL-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -980,103 +1701,188 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr } define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp -; VI-NEXT: v_mad_f32 v7, v7, v9, v11 clamp -; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_or_b32_e32 v1, v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v0, v3 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_mad_f32 v7, v7, v9, v10 clamp +; SDAG-VI-NEXT: v_mad_f32 v6, v6, v8, v11 clamp +; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v3 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp +; SDAG-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp +; SDAG-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v3, v0 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v1, v2, v1 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v3, v0 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v1, v2, v1 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-VI-NEXT: v_mad_f32 v2, v7, v9, v11 clamp +; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp -; CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp -; CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp -; CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp +; GISEL-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp +; GISEL-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -1114,3 +1920,5 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) # attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone speculatable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index 3a6c1f1850ad0..b3b8807fea05f 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -1,8 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: @@ -26,11 +31,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -109,11 +123,20 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> % ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v1, v3, v5 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.hi = extractelement <2 x half> %src0, i32 1 %src1.hi = extractelement <2 x half> %src1, i32 1 %src2.hi = extractelement <2 x half> %src2, i32 1 @@ -125,54 +148,96 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> % } define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX900-LABEL: v_mad_mix_v2f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v1, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v6, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 -; VI-NEXT: v_mac_f32_e32 v1, v3, v5 -; VI-NEXT: v_mac_f32_e32 v0, v4, v6 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v6 -; CI-NEXT: v_mac_f32_e32 v3, v1, v5 -; CI-NEXT: v_mov_b32_e32 v1, v3 -; CI-NEXT: v_mac_f32_e32 v0, v4, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -210,22 +275,42 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, ; VI-NEXT: v_mov_b32_e32 v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_shuffle: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v0 -; CI-NEXT: v_mad_f32 v0, v4, v2, v1 -; CI-NEXT: v_mac_f32_e32 v1, v5, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5 +; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> @@ -249,20 +334,38 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -293,11 +396,20 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -328,11 +440,20 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half ; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -363,11 +484,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -395,11 +524,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr ; VI-NEXT: v_mad_f32 v0, v0, v1, -v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, -v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.neg = fneg float %src2 @@ -428,11 +565,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr ; VI-NEXT: v_mad_f32 v0, v0, v1, |v2| ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, |v2| -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.abs = call float @llvm.fabs.f32(float %src2) @@ -461,11 +606,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float ; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2| ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.abs = call float @llvm.fabs.f32(float %src2) @@ -479,19 +632,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float ; inline immediate. define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 1.0 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 1.0 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: ; VI: ; %bb.0: @@ -501,11 +654,33 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { ; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) @@ -513,19 +688,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { } define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0.15915494 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0.15915494 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: ; VI: ; %bb.0: @@ -535,11 +710,34 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 ; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) @@ -553,33 +751,65 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 ; f32 1/2pi = 0x3e22f983 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2 = fpext half 0xH3118 to float @@ -589,33 +819,65 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x367c0000 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x367c0000 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2 = fpext half 0xH003F to float @@ -624,49 +886,89 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { } define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_f32imm1: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 1.0 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_f32imm1: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 1.0 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_f32imm1: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mad_f32 v0, v0, v3, 1.0 -; VI-NEXT: v_mad_f32 v1, v2, v1, 1.0 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_f32imm1: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 -; CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0 +; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 1.0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 1.0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> ) @@ -674,51 +976,93 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) } define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 -; VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 -; VI-NEXT: v_mac_f32_e32 v1, v2, v4 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 -; CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 -; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 -; CI-NEXT: v_mac_f32_e32 v1, v4, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 +; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, s4 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, s4 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2 = fpext <2 x half> to <2 x float> @@ -727,50 +1071,91 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> } define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0.15915494 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0.15915494 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 -; VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 -; CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 -; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 -; CI-NEXT: v_mac_f32_e32 v1, v4, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 +; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e22f983 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2 = fpext <2 x half> to <2 x float> @@ -800,11 +1185,20 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.hi = extractelement <2 x half> %src0, i32 1 %src1.hi = extractelement <2 x half> %src1, i32 1 %src2.hi = extractelement <2 x half> %src2, i32 1 @@ -830,11 +1224,17 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CIVI-LABEL: no_mix_simple: -; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CIVI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: no_mix_simple: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_f32 v0, v0, v1, v2 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: no_mix_simple: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; CI-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) ret float %result } @@ -852,11 +1252,17 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CIVI-LABEL: no_mix_simple_fabs: -; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 -; CIVI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: no_mix_simple_fabs: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: no_mix_simple_fabs: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; CI-NEXT: s_setpc_b64 s[30:31] %src0.fabs = call float @llvm.fabs.f32(float %src0) %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) ret float %result @@ -892,11 +1298,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_fma_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -928,11 +1343,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_fma_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -970,12 +1393,22 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mul_f32_e32 v0, v0, v1 -; CI-NEXT: v_add_f32_e32 v0, v0, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -1012,12 +1445,21 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mul_f32_e32 v0, v0, v1 -; CI-NEXT: v_add_f32_e32 v0, v0, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %mul = fmul float %src0.ext, %src1.ext @@ -1047,11 +1489,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -1081,11 +1532,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %mul = fmul contract float %src0.ext, %src1.ext @@ -1106,21 +1565,39 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 %src0.neg = fneg half %src0 @@ -1160,13 +1637,23 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half % ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 %src0.neg = fneg half %src0 @@ -1200,13 +1687,23 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0| +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 %src0.abs = call half @llvm.fabs.f16(half %src0) @@ -1230,22 +1727,43 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fneg = fneg <2 x half> %src0.arg.bc %src0 = extractelement <2 x half> %fneg, i32 1 @@ -1269,22 +1787,43 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 -; VI-NEXT: v_mac_f32_e32 v0, v3, v1 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) %src0 = extractelement <2 x half> %fabs, i32 1 @@ -1308,22 +1847,43 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) %fneg.fabs = fneg <2 x half> %fabs From 9aae3dd94c881a36998ee8027d3e126ff3becf89 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Tue, 8 Nov 2022 09:46:39 +0200 Subject: [PATCH 507/516] [mlir][llvm] Update insertion point handling in LLVM import. Insert constants and globals in order by maintaining the position of the constant and global inserted last. Update the tests to reflect the updated insertion order. Also make sure functions are always inserted at the end of the module instead of at the second last position and delete a spurious function in the intrinsic.ll that seems to exist to avoid the first function under test ends up at the end of the module. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D136679 --- mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 139 +++++++++++------- mlir/test/Target/LLVMIR/Import/basic.ll | 30 ++-- .../LLVMIR/Import/constant-aggregate.ll | 47 +++--- .../Import/incorrect-constant-caching.ll | 15 +- .../incorrect-constexpr-inst-caching.ll | 23 ++- .../test/Target/LLVMIR/Import/instructions.ll | 16 +- mlir/test/Target/LLVMIR/Import/intrinsic.ll | 24 ++- .../Target/LLVMIR/Import/zeroinitializer.ll | 8 +- 8 files changed, 160 insertions(+), 142 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 865add93f9659..18cff0c466771 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -412,35 +412,35 @@ class Importer { /// Returns the builtin type equivalent to be used in attributes for the given /// LLVM IR dialect type. Type getStdTypeForAttr(Type type); - /// Return `value` as an attribute to attach to a GlobalOp. + /// Returns `value` as an attribute to attach to a GlobalOp. Attribute getConstantAsAttr(llvm::Constant *value); - /// Return `constant` as an MLIR Value. This could either be a ConstantOp, or - /// an expanded sequence of ops in the current function's entry block (for + /// Converts the LLVM constant to an MLIR value produced by a ConstantOp, + /// AddressOfOp, NullOp, or to an expanded sequence of operations (for /// ConstantExprs or ConstantGEPs). - Value processConstant(llvm::Constant *constant); + Value convertConstantInPlace(llvm::Constant *constant); + /// Converts the LLVM constant to an MLIR value using the + /// `convertConstantInPlace` method and inserts the constant at the start of + /// the function entry block. + Value convertConstant(llvm::Constant *constant); + + /// Set the constant insertion point to the start of the given block. + void setConstantInsertionPointToStart(Block *block) { + constantInsertionBlock = block; + constantInsertionOp = nullptr; + } - /// Builder pointing at where the next Instruction should be generated. + /// Builder pointing at where the next instruction should be generated. OpBuilder builder; + /// Block to insert the next constant into. + Block *constantInsertionBlock = nullptr; + /// Operation to insert the next constant after. + Operation *constantInsertionOp = nullptr; + /// Operation to insert the next global after. + Operation *globalInsertionOp = nullptr; /// The current context. MLIRContext *context; /// The current module being created. ModuleOp module; - /// The entry block of the current function being processed. - Block *currentEntryBlock = nullptr; - - /// Globals are inserted before the first function, if any. - Block::iterator getGlobalInsertPt() { - Block::iterator it = module.getBody()->begin(); - Block::iterator endIt = module.getBody()->end(); - while (it != endIt && !isa(it)) - ++it; - return it; - } - - /// Functions are always inserted before the module terminator. - Block::iterator getFuncInsertPt() { - return std::prev(module.getBody()->end()); - } /// Function-local mapping between original and imported block. DenseMap blockMapping; @@ -642,7 +642,14 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { if (it != globals.end()) return it->second; - OpBuilder b(module.getBody(), getGlobalInsertPt()); + // Insert the global after the last one or at the start of the module. + OpBuilder::InsertionGuard guard(builder); + if (!globalInsertionOp) { + builder.setInsertionPointToStart(module.getBody()); + } else { + builder.setInsertionPointAfter(globalInsertionOp); + } + Attribute valueAttr; if (gv->hasInitializer()) valueAttr = getConstantAsAttr(gv->getInitializer()); @@ -655,20 +662,18 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { alignment = align.value(); } - GlobalOp op = b.create( + GlobalOp op = builder.create( UnknownLoc::get(context), type, gv->isConstant(), convertLinkageFromLLVM(gv->getLinkage()), gv->getName(), valueAttr, alignment, /*addr_space=*/gv->getAddressSpace(), /*dso_local=*/gv->isDSOLocal(), /*thread_local=*/gv->isThreadLocal()); + globalInsertionOp = op; if (gv->hasInitializer() && !valueAttr) { - Region &r = op.getInitializerRegion(); - currentEntryBlock = b.createBlock(&r); - b.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin()); - Value v = processConstant(gv->getInitializer()); - if (!v) - return nullptr; - b.create(op.getLoc(), ArrayRef({v})); + Block *block = builder.createBlock(&op.getInitializerRegion()); + setConstantInsertionPointToStart(block); + Value value = convertConstant(gv->getInitializer()); + builder.create(op.getLoc(), ArrayRef({value})); } if (gv->hasAtLeastLocalUnnamedAddr()) op.setUnnamedAddr(convertUnnamedAddrFromLLVM(gv->getUnnamedAddr())); @@ -678,29 +683,25 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { return globals[gv] = op; } -Value Importer::processConstant(llvm::Constant *constant) { - OpBuilder bEntry(currentEntryBlock, currentEntryBlock->begin()); +Value Importer::convertConstantInPlace(llvm::Constant *constant) { if (Attribute attr = getConstantAsAttr(constant)) { // These constants can be represented as attributes. - OpBuilder b(currentEntryBlock, currentEntryBlock->begin()); Type type = convertType(constant->getType()); if (auto symbolRef = attr.dyn_cast()) - return bEntry.create(UnknownLoc::get(context), type, - symbolRef.getValue()); - return bEntry.create(UnknownLoc::get(context), type, attr); + return builder.create(UnknownLoc::get(context), type, + symbolRef.getValue()); + return builder.create(UnknownLoc::get(context), type, attr); } if (auto *cn = dyn_cast(constant)) { Type type = convertType(cn->getType()); - return bEntry.create(UnknownLoc::get(context), type); + return builder.create(UnknownLoc::get(context), type); } if (auto *gv = dyn_cast(constant)) - return bEntry.create(UnknownLoc::get(context), - processGlobal(gv)); + return builder.create(UnknownLoc::get(context), + processGlobal(gv)); if (auto *ce = dyn_cast(constant)) { llvm::Instruction *i = ce->getAsInstruction(); - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin()); if (failed(processInstruction(i))) return nullptr; assert(valueMapping.count(i)); @@ -720,7 +721,7 @@ Value Importer::processConstant(llvm::Constant *constant) { } if (auto *ue = dyn_cast(constant)) { Type type = convertType(ue->getType()); - return bEntry.create(UnknownLoc::get(context), type); + return builder.create(UnknownLoc::get(context), type); } if (isa(constant) || @@ -747,41 +748,62 @@ Value Importer::processConstant(llvm::Constant *constant) { bool useInsertValue = rootType.isa(); assert((useInsertValue || LLVM::isCompatibleVectorType(rootType)) && "unrecognized aggregate type"); - Value root = bEntry.create(UnknownLoc::get(context), rootType); + Value root = builder.create(UnknownLoc::get(context), rootType); for (unsigned i = 0; i < numElements; ++i) { llvm::Constant *element = getElement(i); - Value elementValue = processConstant(element); + Value elementValue = convertConstantInPlace(element); if (!elementValue) return nullptr; if (useInsertValue) { - root = bEntry.create(UnknownLoc::get(context), root, - elementValue, i); + root = builder.create(UnknownLoc::get(context), root, + elementValue, i); } else { - Attribute indexAttr = bEntry.getI32IntegerAttr(static_cast(i)); - Value indexValue = bEntry.create( - UnknownLoc::get(context), bEntry.getI32Type(), indexAttr); + Attribute indexAttr = + builder.getI32IntegerAttr(static_cast(i)); + Value indexValue = builder.create( + UnknownLoc::get(context), builder.getI32Type(), indexAttr); if (!indexValue) return nullptr; - root = bEntry.create( + root = builder.create( UnknownLoc::get(context), rootType, root, elementValue, indexValue); } } return root; } - emitError(UnknownLoc::get(context)) - << "unhandled constant: " << diag(*constant); return nullptr; } +Value Importer::convertConstant(llvm::Constant *constant) { + assert(constantInsertionBlock && + "expected the constant insertion block to be non-null"); + + // Insert the constant after the last one or at the start or the entry block. + OpBuilder::InsertionGuard guard(builder); + if (!constantInsertionOp) { + builder.setInsertionPointToStart(constantInsertionBlock); + } else { + builder.setInsertionPointAfter(constantInsertionOp); + } + + // Convert the constant in-place and update the insertion point if successful. + if (Value result = convertConstantInPlace(constant)) { + constantInsertionOp = result.getDefiningOp(); + return result; + } + + llvm::errs() << diag(*constant) << "\n"; + llvm_unreachable("unhandled constant"); +} + Value Importer::processValue(llvm::Value *value) { auto it = valueMapping.find(value); if (it != valueMapping.end()) return it->second; - // Process constants such as immediate arguments that have no mapping. + // Convert constants such as immediate arguments that have no mapping. if (auto *c = dyn_cast(value)) - return processConstant(c); + return convertConstant(c); llvm::errs() << diag(*value) << "\n"; llvm_unreachable("unhandled value"); @@ -927,7 +949,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { SmallVector ops; for (unsigned i = 0, ie = lpi->getNumClauses(); i < ie; i++) - ops.push_back(processConstant(lpi->getClause(i))); + ops.push_back(convertConstant(lpi->getClause(i))); Type ty = convertType(lpi->getType()); Value res = builder.create(loc, ty, lpi->isCleanup(), ops); @@ -1034,7 +1056,10 @@ LogicalResult Importer::processFunction(llvm::Function *func) { bool dsoLocal = func->hasLocalLinkage(); CConv cconv = convertCConvFromLLVM(func->getCallingConv()); - builder.setInsertionPoint(module.getBody(), getFuncInsertPt()); + // Insert the function at the end of the module. + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(module.getBody(), module.getBody()->end()); + LLVMFuncOp funcOp = builder.create( UnknownLoc::get(context), func->getName(), functionType, convertLinkageFromLLVM(func->getLinkage()), dsoLocal, cconv); @@ -1090,7 +1115,6 @@ LogicalResult Importer::processFunction(llvm::Function *func) { builder.createBlock(&funcOp.getBody(), funcOp.getBody().end()); mapBlock(&bb, block); } - currentEntryBlock = &funcOp.getFunctionBody().getBlocks().front(); // Add function arguments to the entry block. for (const auto &it : llvm::enumerate(func->args())) { @@ -1103,6 +1127,7 @@ LogicalResult Importer::processFunction(llvm::Function *func) { // operands defined in a dominating block have a valid mapping to an MLIR // value once a block is translated. SetVector blocks = getTopologicallySortedBlocks(func); + setConstantInsertionPointToStart(lookupBlock(blocks.front())); for (llvm::BasicBlock *bb : blocks) { if (failed(processBasicBlock(bb, lookupBlock(bb)))) return failure(); diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll index 784491499fe2d..05d10eeaec9d8 100644 --- a/mlir/test/Target/LLVMIR/Import/basic.ll +++ b/mlir/test/Target/LLVMIR/Import/basic.ll @@ -24,8 +24,8 @@ @g4 = external global i32, align 8 ; CHECK: llvm.mlir.global internal constant @int_gep() {addr_space = 0 : i32, dso_local} : !llvm.ptr { -; CHECK-DAG: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm.ptr -; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 +; CHECK: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm.ptr +; CHECK: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 ; CHECK-NEXT: %[[gepinit:[0-9]+]] = llvm.getelementptr %[[addr]][%[[c2]]] : (!llvm.ptr, i32) -> !llvm.ptr ; CHECK-NEXT: llvm.return %[[gepinit]] : !llvm.ptr ; CHECK-NEXT: } @@ -133,10 +133,10 @@ define internal spir_func void @spir_func_internal() { ; FIXME: function attributes. ; CHECK-LABEL: llvm.func internal @f1(%arg0: i64) -> i32 attributes {dso_local} { ; CHECK-DBG: llvm.func internal @f1(%arg0: i64 loc(unknown)) -> i32 attributes {dso_local} { -; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 -; CHECK-DAG: %[[c42:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 -; CHECK-DAG: %[[c1:[0-9]+]] = llvm.mlir.constant(true) : i1 -; CHECK-DAG: %[[c43:[0-9]+]] = llvm.mlir.constant(43 : i32) : i32 +; CHECK: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 +; CHECK: %[[c1:[0-9]+]] = llvm.mlir.constant(true) : i1 +; CHECK: %[[c43:[0-9]+]] = llvm.mlir.constant(43 : i32) : i32 +; CHECK: %[[c42:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 define internal dso_local i32 @f1(i64 %a) norecurse { entry: ; CHECK: %{{[0-9]+}} = llvm.inttoptr %arg0 : i64 to !llvm.ptr @@ -148,7 +148,7 @@ entry: ; %{{[0-9]+}} = llvm.ptrtoint %[[addrof2]] : !llvm.ptr to i64 ; %{{[0-9]+}} = llvm.getelementptr %[[addrof]][%3] : (!llvm.ptr, i32) -> !llvm.ptr %bb = ptrtoint double* @g2 to i64 - %cc = getelementptr double, double* @g2, i32 2 + %cc = getelementptr double, double* @g2, i32 3 ; CHECK: %[[b:[0-9]+]] = llvm.trunc %arg0 : i64 to i32 ; CHECK-DBG: llvm.trunc %arg0 : i64 to i32 loc(#[[UNKNOWNLOC]]) %b = trunc i64 %a to i32 @@ -195,18 +195,18 @@ define void @f6(void (i16) *%fn) { ; Testing rest of the floating point constant kinds. ; CHECK-LABEL: llvm.func @FPConstant(%arg0: f16, %arg1: bf16, %arg2: f128, %arg3: f80) define void @FPConstant(half %a, bfloat %b, fp128 %c, x86_fp80 %d) { - ; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(7.000000e+00 : f80) : f80 - ; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(0.000000e+00 : f128) : f128 - ; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(1.000000e+00 : bf16) : bf16 - ; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16 + ; CHECK: %[[C0:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16 + ; CHECK: %[[C1:.+]] = llvm.mlir.constant(1.000000e+00 : bf16) : bf16 + ; CHECK: %[[C2:.+]] = llvm.mlir.constant(0.000000e+00 : f128) : f128 + ; CHECK: %[[C3:.+]] = llvm.mlir.constant(7.000000e+00 : f80) : f80 - ; CHECK: llvm.fadd %[[C3]], %arg0 : f16 + ; CHECK: llvm.fadd %[[C0]], %arg0 : f16 %1 = fadd half 1.0, %a - ; CHECK: llvm.fadd %[[C2]], %arg1 : bf16 + ; CHECK: llvm.fadd %[[C1]], %arg1 : bf16 %2 = fadd bfloat 1.0, %b - ; CHECK: llvm.fadd %[[C1]], %arg2 : f128 + ; CHECK: llvm.fadd %[[C2]], %arg2 : f128 %3 = fadd fp128 0xL00000000000000000000000000000000, %c - ; CHECK: llvm.fadd %[[C0]], %arg3 : f80 + ; CHECK: llvm.fadd %[[C3]], %arg3 : f80 %4 = fadd x86_fp80 0xK4001E000000000000000, %d ret void } diff --git a/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll b/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll index 5e22aadcaff0e..955cf47779664 100644 --- a/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll +++ b/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll @@ -1,41 +1,40 @@ ; RUN: mlir-translate --import-llvm %s | FileCheck %s - -; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(7 : i32) : i32 -; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(8 : i16) : i16 -; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(4 : i8) : i8 -; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(9 : i32) : i32 ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> -; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C3]], %[[ROOT]][0] -; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN0]][1] -; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN1]][2] -; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C0]], %[[CHAIN2]][3] +; CHECK: %[[C0:.+]] = llvm.mlir.constant(9 : i32) : i32 +; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C0]], %[[ROOT]][0] +; CHECK: %[[C1:.+]] = llvm.mlir.constant(4 : i8) : i8 +; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN0]][1] +; CHECK: %[[C2:.+]] = llvm.mlir.constant(8 : i16) : i16 +; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN1]][2] +; CHECK: %[[C3:.+]] = llvm.mlir.constant(7 : i32) : i32 +; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C3]], %[[CHAIN2]][3] ; CHECK: llvm.return %[[CHAIN3]] %SimpleAggType = type {i32, i8, i16, i32} @simpleAgg = global %SimpleAggType {i32 9, i8 4, i16 8, i32 7} -; CHECK: %[[NP:.+]] = llvm.mlir.null : !llvm.ptr> -; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(4 : i32) : i32 -; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(3 : i16) : i16 -; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2 : i8) : i8 -; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(1 : i32) : i32 -; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> -; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C3]], %[[ROOT]][0] +; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"NestedAggType", (struct<"SimpleAggType", (i32, i8, i16, i32)>, ptr>)> +; CHECK: %[[NESTED:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> +; CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32 +; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C1]], %[[NESTED]][0] +; CHECK: %[[C2:.+]] = llvm.mlir.constant(2 : i8) : i8 ; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN0]][1] -; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN1]][2] -; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C0]], %[[CHAIN2]][3] -; CHECK: %[[ROOT2:.+]] = llvm.mlir.undef : !llvm.struct<"NestedAggType", (struct<"SimpleAggType", (i32, i8, i16, i32)>, ptr>)> -; CHECK: %[[CHAIN4:.+]] = llvm.insertvalue %[[CHAIN3]], %[[ROOT2]][0] +; CHECK: %[[C3:.+]] = llvm.mlir.constant(3 : i16) : i16 +; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C3]], %[[CHAIN1]][2] +; CHECK: %[[C4:.+]] = llvm.mlir.constant(4 : i32) : i32 +; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C4]], %[[CHAIN2]][3] +; CHECK: %[[CHAIN4:.+]] = llvm.insertvalue %[[CHAIN3]], %[[ROOT]][0] +; CHECK: %[[NP:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[CHAIN5:.+]] = llvm.insertvalue %[[NP]], %[[CHAIN4]][1] ; CHECK: llvm.return %[[CHAIN5]] %NestedAggType = type {%SimpleAggType, %SimpleAggType*} @nestedAgg = global %NestedAggType { %SimpleAggType{i32 1, i8 2, i16 3, i32 4}, %SimpleAggType* null } -; CHECK: %[[C0:.+]] = llvm.mlir.null : !llvm.ptr> -; CHECK: %[[C1:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.vec<2 x ptr>> +; CHECK: %[[C0:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[P0:.+]] = llvm.mlir.constant(0 : i32) : i32 -; CHECK: %[[CHAIN0:.+]] = llvm.insertelement %[[C1]], %[[ROOT]][%[[P0]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[CHAIN0:.+]] = llvm.insertelement %[[C0]], %[[ROOT]][%[[P0]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[C1:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[P1:.+]] = llvm.mlir.constant(1 : i32) : i32 -; CHECK: %[[CHAIN1:.+]] = llvm.insertelement %[[C0]], %[[CHAIN0]][%[[P1]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[CHAIN1:.+]] = llvm.insertelement %[[C1]], %[[CHAIN0]][%[[P1]] : i32] : !llvm.vec<2 x ptr>> ; CHECK: llvm.return %[[CHAIN1]] : !llvm.vec<2 x ptr>> @vectorAgg = global <2 x %SimpleAggType*> <%SimpleAggType* null, %SimpleAggType* null> diff --git a/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll b/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll index a4add0ea95414..afb8cf4e4c1a9 100644 --- a/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll +++ b/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll @@ -8,23 +8,22 @@ ; only wrote minimum level of checks. %my_struct = type {i32, i8*} +; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.mlir.addressof @str1 : !llvm.ptr> -; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> ; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(8 : i32) : i32 ; CHECK: llvm.insertvalue -; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(8 : i32) : i32 -; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> +; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.insertvalue +; CHECK: llvm.getelementptr ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue ; CHECK: llvm.return @str0 = private unnamed_addr constant [5 x i8] c"aaaa\00" @str1 = private unnamed_addr constant [5 x i8] c"bbbb\00" @g = global [2 x %my_struct] [%my_struct {i32 8, i8* getelementptr ([5 x i8], [5 x i8]* @str0, i32 0, i32 0)}, %my_struct {i32 7, i8* getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i32 0)}] - diff --git a/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll b/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll index edc8379067451..916b961c568e7 100644 --- a/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll +++ b/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll @@ -5,27 +5,26 @@ ; Thus, we only wrote minimum level of checks. %my_struct = type {i32, i8*} -; CHECK: llvm.mlir.constant(3 : i32) : i32 -; CHECK: llvm.mlir.constant(2 : i32) : i32 +; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> +; CHECK: llvm.mlir.constant(0 : i32) : i32 +; CHECK: llvm.mlir.constant(1 : i32) : i32 ; CHECK: llvm.mlir.addressof @str1 : !llvm.ptr> -; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.mlir.constant(2 : i32) : i32 +; CHECK: llvm.mlir.constant(3 : i32) : i32 +; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> ; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(8 : i32) : i32 ; CHECK: llvm.insertvalue -; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.constant(1 : i32) : i32 -; CHECK: llvm.mlir.constant(0 : i32) : i32 -; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(8 : i32) : i32 -; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> +; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.insertvalue +; CHECK: llvm.getelementptr ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue ; CHECK: llvm.return @str0 = private unnamed_addr constant [5 x i8] c"aaaa\00" @str1 = private unnamed_addr constant [5 x i8] c"bbbb\00" @g = global [2 x %my_struct] [%my_struct {i32 8, i8* getelementptr ([5 x i8], [5 x i8]* @str0, i32 0, i32 1)}, %my_struct {i32 7, i8* getelementptr ([5 x i8], [5 x i8]* @str1, i32 2, i32 3)}] - diff --git a/mlir/test/Target/LLVMIR/Import/instructions.ll b/mlir/test/Target/LLVMIR/Import/instructions.ll index fa3c5efa4f40d..c5322ab6f84f3 100644 --- a/mlir/test/Target/LLVMIR/Import/instructions.ll +++ b/mlir/test/Target/LLVMIR/Import/instructions.ll @@ -6,8 +6,8 @@ ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @integer_arith(i32 %arg1, i32 %arg2, i64 %arg3, i64 %arg4) { - ; CHECK-DAG: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 - ; CHECK-DAG: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 ; CHECK: llvm.add %[[ARG1]], %[[C1]] : i32 %1 = add i32 %arg1, -7 ; CHECK: llvm.add %[[C2]], %[[ARG2]] : i32 @@ -75,13 +75,13 @@ define i1 @integer_compare(i32 %arg1, i32 %arg2, <4 x i64> %arg3, <4 x i64> %arg ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @fp_arith(float %arg1, float %arg2, double %arg3, double %arg4) { - ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f64) : f64 - ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f32) : f32 - ; CHECK: llvm.fadd %[[C2]], %[[ARG1]] : f32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f32) : f32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f64) : f64 + ; CHECK: llvm.fadd %[[C1]], %[[ARG1]] : f32 %1 = fadd float 0x403E4CCCC0000000, %arg1 ; CHECK: llvm.fadd %[[ARG1]], %[[ARG2]] : f32 %2 = fadd float %arg1, %arg2 - ; CHECK: llvm.fadd %[[C1]], %[[ARG3]] : f64 + ; CHECK: llvm.fadd %[[C2]], %[[ARG3]] : f64 %3 = fadd double 3.030000e+01, %arg3 ; CHECK: llvm.fsub %[[ARG1]], %[[ARG2]] : f32 %4 = fsub float %arg1, %arg2 @@ -212,8 +212,8 @@ define ptr addrspace(2) @addrspace_casts(ptr addrspace(1) %arg1) { ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @integer_arith(i32 %arg1, i32 %arg2, i64 %arg3, i64 %arg4) { - ; CHECK-DAG: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 - ; CHECK-DAG: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 ; CHECK: llvm.add %[[ARG1]], %[[C1]] : i32 ; CHECK: llvm.add %[[C2]], %[[ARG2]] : i32 ; CHECK: llvm.sub %[[ARG3]], %[[ARG4]] : i64 diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index b8ea328eec6df..550203b543c59 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -1,9 +1,5 @@ ; RUN: mlir-translate -import-llvm %s | FileCheck %s -define void @intrinsics() { - ret void -} - ; CHECK-LABEL: llvm.func @fmuladd_test define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { ; CHECK: llvm.intr.fmuladd(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 @@ -131,21 +127,21 @@ define void @bitreverse_test(i32 %0, <8 x i32> %1) { } ; CHECK-LABEL: llvm.func @ctlz_test define void @ctlz_test(i32 %0, <8 x i32> %1) { - ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval1]]) : (i32, i1) -> i32 %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) - ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval2]]) : (vector<8xi32>, i1) -> vector<8xi32> %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %1, i1 false) ret void } ; CHECK-LABEL: llvm.func @cttz_test define void @cttz_test(i32 %0, <8 x i32> %1) { - ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval1]]) : (i32, i1) -> i32 %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false) - ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval2]]) : (vector<8xi32>, i1) -> vector<8xi32> %4 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %1, i1 false) ret void } @@ -340,9 +336,9 @@ define void @memcpy_test(i32 %0, i8* %1, i8* %2) { ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 ; CHECK: %[[constant:.+]] = llvm.mlir.constant(10 : i64) : i64 ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + ; CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) - ; CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %[[constant]], %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + ; CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %[[constant]], %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %1, i8* %2, i64 10, i1 false) ret void } diff --git a/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll b/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll index 3f582138b03b5..bc0e4cde45175 100644 --- a/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll +++ b/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll @@ -4,10 +4,10 @@ ; CHECK: llvm.mlir.global external @D() ; CHECK-SAME: !llvm.struct<"Domain", (ptr>>, ptr>)> -; CHECK-DAG: %[[E0:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>> -; CHECK-DAG: %[[E1:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>>> ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"Domain", (ptr>>, ptr>)> -; CHECK: %[[CHAIN:.+]] = llvm.insertvalue %[[E1]], %[[ROOT]][0] -; CHECK: %[[RES:.+]] = llvm.insertvalue %[[E0]], %[[CHAIN]][1] +; CHECK: %[[E0:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>>> +; CHECK: %[[CHAIN:.+]] = llvm.insertvalue %[[E0]], %[[ROOT]][0] +; CHECK: %[[E1:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>> +; CHECK: %[[RES:.+]] = llvm.insertvalue %[[E1]], %[[CHAIN]][1] ; CHECK: llvm.return %[[RES]] @D = global %Domain zeroinitializer From 535535b91cca45f8e04989b40cb8da1bb9de1b4c Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 4 Nov 2022 15:18:07 +0100 Subject: [PATCH 508/516] Implement SPV_INTEL_tensor_float32_conversion extension (#1656) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This extension adds conversion instruction from float to tensor float (TF32) data format. TF32 uses 1 bit for a sign, 8 bits for an exponent and 10 bits for a fraction. This extension doesn’t introduce TF32 type in SPIR-V, instead instruction below uses 32-bit float type to represent TF32 value. Spec: https://github.com/intel/llvm/pull/6990 Signed-off-by: Sidorov, Dmitry Original commit: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/ea3ddc1 --- llvm-spirv/include/LLVMSPIRVExtensions.inc | 1 + .../lib/SPIRV/libSPIRV/SPIRVInstruction.h | 58 +++++++++++++++++++ .../lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 2 + .../SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h | 1 + .../lib/SPIRV/libSPIRV/spirv_internal.hpp | 5 ++ .../convert_tensor_float32.ll | 50 ++++++++++++++++ 6 files changed, 117 insertions(+) create mode 100644 llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll diff --git a/llvm-spirv/include/LLVMSPIRVExtensions.inc b/llvm-spirv/include/LLVMSPIRVExtensions.inc index 1caaf7730b644..65b053675f074 100644 --- a/llvm-spirv/include/LLVMSPIRVExtensions.inc +++ b/llvm-spirv/include/LLVMSPIRVExtensions.inc @@ -55,3 +55,4 @@ EXT(SPV_INTEL_non_constant_addrspace_printf) EXT(SPV_INTEL_complex_float_mul_div) EXT(SPV_INTEL_split_barrier) EXT(SPV_INTEL_masked_gather_scatter) +EXT(SPV_INTEL_tensor_float32_conversion) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 2ffd4c3aa6526..6af437b9c99ea 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -3561,6 +3561,64 @@ class SPIRVMaskedScatterINTELInst _SPIRV_OP(MaskedGather, true, 7) _SPIRV_OP(MaskedScatter, false, 5) #undef _SPIRV_OP + +template +class SPIRVTensorFloat32ConversionINTELInstBase : public SPIRVUnaryInst { +protected: + SPIRVCapVec getRequiredCapability() const override { + return getVec(internal::CapabilityTensorFloat32ConversionINTEL); + } + + llvm::Optional getRequiredExtension() const override { + return ExtensionID::SPV_INTEL_tensor_float32_conversion; + } + + void validate() const override { + SPIRVUnaryInst::validate(); + + SPIRVType *ResCompTy = this->getType(); + SPIRVWord ResCompCount = 1; + if (ResCompTy->isTypeVector()) { + ResCompCount = ResCompTy->getVectorComponentCount(); + ResCompTy = ResCompTy->getVectorComponentType(); + } + + // validate is a const method, whilst getOperand is non-const method + // because it may call a method of class Module that may modify LiteralMap + // of Module field. That modification is not impacting validate method for + // these instructions, so const_cast is safe here. + using SPVTF32ConvTy = SPIRVTensorFloat32ConversionINTELInstBase; + SPIRVValue *Input = const_cast(this)->getOperand(0); + + SPIRVType *InCompTy = Input->getType(); + SPIRVWord InCompCount = 1; + if (InCompTy->isTypeVector()) { + InCompCount = InCompTy->getVectorComponentCount(); + InCompTy = InCompTy->getVectorComponentType(); + } + + auto InstName = OpCodeNameMap::map(OC); + SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + + SPVErrLog.checkError( + ResCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, + InstName + "\nResult value must be a scalar or vector of floating-point" + " 32-bit type\n"); + SPVErrLog.checkError(InCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, + InstName + + "\nInput value must be a scalar or vector of " + "floating-point 32-bit type\n"); + SPVErrLog.checkError( + ResCompCount == InCompCount, SPIRVEC_InvalidInstruction, + InstName + "\nInput type must have the same number of components as " + "result type\n"); + } +}; + +#define _SPIRV_OP(x) \ + typedef SPIRVTensorFloat32ConversionINTELInstBase SPIRV##x; +_SPIRV_OP(ConvertFToTF32INTEL) +#undef _SPIRV_OP } // namespace SPIRV #endif // SPIRV_LIBSPIRV_SPIRVINSTRUCTION_H diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 4520a5a4602a9..d098ff0d8b244 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -620,6 +620,8 @@ template <> inline void SPIRVMap::init() { "NonConstantAddrspacePrintfINTEL"); add(internal::CapabilityComplexFloatMulDivINTEL, "ComplexFloatMulDivINTEL"); add(internal::CapabilityMaskedGatherScatterINTEL, "MaskedGatherScatterINTEL"); + add(internal::CapabilityTensorFloat32ConversionINTEL, + "TensorFloat32ConversionINTEL"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 0ed0d855d5e61..9d8765b5aee5e 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -15,3 +15,4 @@ _SPIRV_OP_INTERNAL(ComplexFMulINTEL, internal::ComplexFMulINTEL) _SPIRV_OP_INTERNAL(ComplexFDivINTEL, internal::ComplexFDivINTEL) _SPIRV_OP_INTERNAL(MaskedGatherINTEL, internal::OpMaskedGatherINTEL) _SPIRV_OP_INTERNAL(MaskedScatterINTEL, internal::OpMaskedScatterINTEL) +_SPIRV_OP_INTERNAL(ConvertFToTF32INTEL, internal::ConvertFToTF32INTEL) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp index a08f59c37b5d0..3220c6ebe5092 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -46,6 +46,7 @@ enum InternalOp { IOpJointMatrixWorkItemLengthINTEL = 6410, IOpComplexFMulINTEL = 6415, IOpComplexFDivINTEL = 6416, + IOpConvertFToTF32INTEL = 6426, IOpMaskedGatherINTEL = 6428, IOpMaskedScatterINTEL = 6429, IOpPrev = OpMax - 2, @@ -81,6 +82,7 @@ enum InternalCapability { ICapGlobalVariableDecorationsINTEL = 6146, ICapabilityNonConstantAddrspacePrintfINTEL = 6411, ICapabilityComplexFloatMulDivINTEL = 6414, + ICapabilityTensorFloat32ConversionINTEL = 6425, ICapabilityMaskedGatherScatterINTEL = 6427 }; @@ -133,6 +135,9 @@ _SPIRV_OP(Op, ComplexFDivINTEL) _SPIRV_OP(Capability, MaskedGatherScatterINTEL) _SPIRV_OP(Op, MaskedGatherINTEL) _SPIRV_OP(Op, MaskedScatterINTEL) + +_SPIRV_OP(Capability, TensorFloat32ConversionINTEL) +_SPIRV_OP(Op, ConvertFToTF32INTEL) #undef _SPIRV_OP constexpr Op OpForward = static_cast(IOpForward); diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll new file mode 100644 index 0000000000000..1f0270694178f --- /dev/null +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll @@ -0,0 +1,50 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_tensor_float32_conversion +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r -emit-opaque-pointers --spirv-target-env=SPV-IR +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; RUN: not llvm-spirv %t.bc 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: RequiresExtension: Feature requires the following SPIR-V extension: +; CHECK-ERROR-NEXT: SPV_INTEL_tensor_float32_conversion + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +; CHECK-SPIRV: Capability TensorFloat32ConversionINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_tensor_float32_conversion" +; CHECK-SPIRV: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV: TypeVector [[#FP32v8Ty:]] [[#FP32Ty]] 8 +; CHECK-SPIRV: Constant [[#FP32Ty]] [[#CONST:]] 1065353216 + +; CHECK-SPIRV: FunctionParameter [[#FP32Ty]] [[FP32ValId:.*]] +; CHECK-SPIRV: FunctionParameter [[#FP32v8Ty]] [[FP32v8ValId:.*]] + +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32Ty]] [[#]] [[FP32ValId]] +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32v8Ty]] [[#]] [[FP32v8ValId]] +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32Ty]] [[#]] [[#CONST]] + +; CHECK-LLVM: call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float +; CHECK-LLVM: call spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float> +; CHECK-LLVM: call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float 1.000000e+00) + +define spir_func void @_Z2opffv8(float %a, <8 x float> %in) { + %1 = tail call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float %a) + %2 = tail call spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float> %in) + %3 = tail call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float 1.000000e+00) + ret void +} + +declare spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float) + +declare spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float>) + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 16.0.0"} From 9e51f5beb1f21d6f44b7f9dfd95cbae6de896f0e Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Tue, 8 Nov 2022 10:40:39 +0100 Subject: [PATCH 509/516] Translate readnone attribute as function parameter attribute (#1697) Community restricted readnone, readonly and writeonly attributes to be only function parameter attributes. This patch aligns the translator with llvm.org. It also fixes a bug, when readnone attribute is being mapped to NoWrite SPIR-V function parameter attribute. Signed-off-by: Sidorov, Dmitry Signed-off-by: Sidorov, Dmitry Original commit: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/c1fe5fe --- llvm-spirv/lib/SPIRV/SPIRVInternal.h | 3 +-- llvm-spirv/lib/SPIRV/SPIRVReader.cpp | 4 +++- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 4 +++- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 4 +++- .../transcoding/OpGenericPtrMemSemantics.ll | 4 ++-- .../transcoding/OpImageSampleExplicitLod.ll | 4 ++-- llvm-spirv/test/transcoding/OpSwitch32.ll | 6 ++--- llvm-spirv/test/transcoding/OpSwitch64.ll | 6 ++--- llvm-spirv/test/transcoding/bitcast.ll | 6 ++--- llvm-spirv/test/transcoding/builtin_calls.ll | 2 +- .../builtin_function_readnone_attr.ll | 23 ++++++++++++------- .../transcoding/builtin_vars_arithmetics.ll | 2 +- llvm-spirv/test/transcoding/isequal.ll | 8 +++---- llvm-spirv/test/transcoding/unreachable.ll | 4 ++-- 14 files changed, 46 insertions(+), 34 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index bd41494983659..60500f399b77c 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -236,6 +236,7 @@ inline void SPIRVMap::init() { add(Attribute::NoAlias, FunctionParameterAttributeNoAlias); add(Attribute::NoCapture, FunctionParameterAttributeNoCapture); add(Attribute::ReadOnly, FunctionParameterAttributeNoWrite); + add(Attribute::ReadNone, FunctionParameterAttributeNoReadWrite); } typedef SPIRVMap SPIRSPIRVFuncParamAttrMap; @@ -243,8 +244,6 @@ typedef SPIRVMap template <> inline void SPIRVMap::init() { - add(Attribute::ReadNone, FunctionControlPureMask); - add(Attribute::ReadOnly, FunctionControlConstMask); add(Attribute::AlwaysInline, FunctionControlInlineMask); add(Attribute::NoInline, FunctionControlDontInlineMask); add(Attribute::OptimizeNone, internal::FunctionControlOptNoneINTELMask); diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index a0ff677dfde94..b31af9f60c00b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -4378,7 +4378,9 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, if (isFuncNoUnwind()) F->addFnAttr(Attribute::NoUnwind); if (isFuncReadNone(UnmangledName)) - F->addFnAttr(Attribute::ReadNone); + for (llvm::Argument &Arg : F->args()) + if (Arg.getType()->isPointerTy()) + Arg.addAttr(Attribute::ReadNone); } auto Args = transValue(BC->getArgValues(), F, BB); SPIRVDBG(dbgs() << "[transOCLBuiltinFromExtInst] Function: " << *F diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index fd222fbdca2e3..b5351ceead32f 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -2065,8 +2065,10 @@ bool lowerBuiltinVariableToCall(GlobalVariable *GV, Func = Function::Create(FT, GlobalValue::ExternalLinkage, MangledName, M); Func->setCallingConv(CallingConv::SPIR_FUNC); Func->addFnAttr(Attribute::NoUnwind); - Func->addFnAttr(Attribute::ReadNone); Func->addFnAttr(Attribute::WillReturn); + for (llvm::Argument &Arg : Func->args()) + if (Arg.getType()->isPointerTy()) + Arg.addAttr(Attribute::ReadNone); } // Collect instructions in these containers to remove them later. diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 8d3245ec8bffa..56ece5aad8c9f 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -849,8 +849,10 @@ SPIRVFunction *LLVMToSPIRVBase::transFunctionDecl(Function *F) { BA->addAttr(FunctionParameterAttributeNoCapture); if (I->hasStructRetAttr()) BA->addAttr(FunctionParameterAttributeSret); - if (I->onlyReadsMemory()) + if (Attrs.hasParamAttr(ArgNo, Attribute::ReadOnly)) BA->addAttr(FunctionParameterAttributeNoWrite); + if (Attrs.hasParamAttr(ArgNo, Attribute::ReadNone)) + BA->addAttr(FunctionParameterAttributeNoReadWrite); if (Attrs.hasParamAttr(ArgNo, Attribute::ZExt)) BA->addAttr(FunctionParameterAttributeZext); if (Attrs.hasParamAttr(ArgNo, Attribute::SExt)) diff --git a/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll b/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll index 8b7545e7aedff..9da8e260d448d 100644 --- a/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll +++ b/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll @@ -24,7 +24,7 @@ target triple = "spir-unknown-unknown" @gint = addrspace(1) global i32 1, align 4 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind define spir_func i32 @isFenceValid(i32 %fence) #0 { entry: %switch = icmp ult i32 %fence, 4 @@ -66,7 +66,7 @@ entry: declare spir_func i32 @_Z13get_global_idj(i32) #2 -attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } diff --git a/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll b/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll index c214ff9dceb5e..b41b7d07c455e 100644 --- a/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll +++ b/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll @@ -43,14 +43,14 @@ entry: ; Function Attrs: nounwind declare spir_func float @_Z11read_imagef20ocl_image2d_depth_ro11ocl_samplerDv2_i(%opencl.image2d_depth_ro_t addrspace(1)*, i32, <2 x i32>) #0 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i32 @_Z13get_global_idj(i32) #1 ; Function Attrs: nounwind declare spir_func <2 x i32> @_Z13get_image_dim20ocl_image2d_depth_ro(%opencl.image2d_depth_ro_t addrspace(1)*) #0 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/OpSwitch32.ll b/llvm-spirv/test/transcoding/OpSwitch32.ll index 1dd9337fd996f..718dbe29fe25e 100644 --- a/llvm-spirv/test/transcoding/OpSwitch32.ll +++ b/llvm-spirv/test/transcoding/OpSwitch32.ll @@ -75,12 +75,12 @@ sw.epilog: ; preds = %entry, %sw.bb1, %sw ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/OpSwitch64.ll b/llvm-spirv/test/transcoding/OpSwitch64.ll index 54d396627d0eb..91dfc30536630 100644 --- a/llvm-spirv/test/transcoding/OpSwitch64.ll +++ b/llvm-spirv/test/transcoding/OpSwitch64.ll @@ -86,12 +86,12 @@ sw.epilog: ; preds = %entry, %sw.bb3, %sw ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/bitcast.ll b/llvm-spirv/test/transcoding/bitcast.ll index 1ab279f7b539e..0e6aa95ca295f 100644 --- a/llvm-spirv/test/transcoding/bitcast.ll +++ b/llvm-spirv/test/transcoding/bitcast.ll @@ -26,12 +26,12 @@ entry: ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/builtin_calls.ll b/llvm-spirv/test/transcoding/builtin_calls.ll index 8c9d7b6aa9c2d..a5d6c1eae77ea 100644 --- a/llvm-spirv/test/transcoding/builtin_calls.ll +++ b/llvm-spirv/test/transcoding/builtin_calls.ll @@ -16,7 +16,7 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: Variable {{[0-9]+}} [[Id:[0-9]+]] ; CHECK-SPIRV: Variable {{[0-9]+}} [[Id:[0-9]+]] -; Function Attrs: nounwind readnone +; Function Attrs: nounwind define spir_kernel void @f() #0 !kernel_arg_addr_space !0 !kernel_arg_access_qual !0 !kernel_arg_type !0 !kernel_arg_base_type !0 !kernel_arg_type_qual !0 { entry: %0 = call spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv() diff --git a/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll index 07881972b4e28..c99098fa33908 100644 --- a/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll +++ b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll @@ -1,13 +1,23 @@ ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM +; CHECK-SPIRV: Name [[#A:]] "a" +; CHECK-SPIRV: Name [[#B:]] "b" +; CHECK-SPIRV: Decorate [[#A]] FuncParamAttr 5 +; CHECK-SPIRV: Decorate [[#A]] FuncParamAttr 6 +; CHECK-SPIRV: Decorate [[#B]] FuncParamAttr 7 + +; CHECK-LLVM: {{.*}}void @test_builtin_readnone(ptr nocapture readonly %{{.*}}, ptr nocapture readnone %{{.*}}) + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "spir-unknown-unknown" ; Function Attrs: convergent nofree norecurse nounwind uwtable -define dso_local spir_kernel void @test_builtin_readnone(double* nocapture readonly %a, double* nocapture %b) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +define dso_local spir_kernel void @test_builtin_readnone(double* nocapture readonly %a, double* nocapture readnone %b) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { entry: %0 = load double, double* %a, align 8, !tbaa !7 %call = tail call double @_Z3expd(double %0) #2 @@ -18,18 +28,15 @@ entry: ret void } -; Function Attrs: convergent nounwind readnone -; CHECK-LLVM: declare{{.*}}@_Z3expd{{.*}}#[[#Attrs:]] +; Function Attrs: convergent nounwind declare dso_local double @_Z3expd(double) local_unnamed_addr #1 -; Function Attrs: convergent nounwind readnone -; CHECK-LLVM: declare{{.*}}@_Z3cosd{{.*}}#[[#Attrs]] +; Function Attrs: convergent nounwind declare dso_local double @_Z3cosd(double) local_unnamed_addr #1 attributes #0 = { convergent nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK-LLVM: attributes #[[#Attrs]] {{.*}} readnone -attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind readnone } +attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } !llvm.module.flags = !{!0} !opencl.ocl.version = !{!1} diff --git a/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll b/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll index b36298489f62b..3b50852f54390 100644 --- a/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll +++ b/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll @@ -123,7 +123,7 @@ entry: attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK-LLVM-OCL: attributes #1 = { nounwind readnone willreturn } +; CHECK-LLVM-OCL: attributes #1 = { nounwind willreturn } !llvm.module.flags = !{!0} !opencl.spir.version = !{!1} diff --git a/llvm-spirv/test/transcoding/isequal.ll b/llvm-spirv/test/transcoding/isequal.ll index a49f2fe942e55..f1e363ddc7d12 100644 --- a/llvm-spirv/test/transcoding/isequal.ll +++ b/llvm-spirv/test/transcoding/isequal.ll @@ -30,15 +30,15 @@ entry: ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func <8 x i32> @_Z7isequalDv8_fDv8_f(<8 x float>, <8 x float>) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/unreachable.ll b/llvm-spirv/test/transcoding/unreachable.ll index 6193648f78928..078681fbd6a05 100644 --- a/llvm-spirv/test/transcoding/unreachable.ll +++ b/llvm-spirv/test/transcoding/unreachable.ll @@ -30,11 +30,11 @@ define spir_kernel void @unreachable_simple(i32 addrspace(1)* nocapture %in, i32 ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !spirv.Source = !{!6} From bdc598856d7ce4948768bbe7f1aa4ce0693b3670 Mon Sep 17 00:00:00 2001 From: Joshua Cranmer Date: Tue, 8 Nov 2022 04:46:56 -0500 Subject: [PATCH 510/516] Use TypedPointerType in more places. (#1683) One of the reasons behind this change is to make code more easily deal with the future prospect of opaque types, so that helper methods (like adjusting image types) can handle both pointer type and opaque type representations simply by querying if the input type is a TypedPointerType or an OpaqueType [name for the latter still pending]. The set of changes are: * OCLTypeToSPIRV now uses TypedPointerType internally * adaptSPIRVImageType and getSPIRVStructTypeByChangeBaseTypeName are collapsed into one method (adjustImageType) that works with TypedPointerTypes. * A few is*StructType methods have been reverted back to is*Type methods, taking a TypedPointerType parameter instead. * BuiltinCallHelper::addSPIRVCall{Pair} allows for the creation of SPIR-V calls that can use TypedPointerType or actual type for parameters and return value. * BuiltinCallHelper::getCallValue{Type} is a simple helper that hides many of the uses of getParameterTypes. * The Type* parameter of the callback in BuiltinCallMutator::mapArg now provides a TypedPointerType or the actual type, instead of the pointer element type. * BuiltinCallMutator::ValueTypePair similarly takes a TypedPointerType or the actual type. * getParameterTypes (when passed a SmallVector) does a similar thing. (The SmallVector variant has been removed in favor of only using the other one.) Note that the last few changes do change the semantics of function parameters without changing the function name or signature. Co-authored-by: Dmitry Sidorov Original commit: https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/5ce6a99 --- llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp | 104 ++++++++----------- llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp | 52 +++++----- llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h | 15 ++- llvm-spirv/lib/SPIRV/OCLUtil.cpp | 9 +- llvm-spirv/lib/SPIRV/OCLUtil.h | 2 +- llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp | 107 ++++++++++++++++---- llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h | 65 ++++++++++-- llvm-spirv/lib/SPIRV/SPIRVInternal.h | 38 +++---- llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp | 73 ++++--------- llvm-spirv/lib/SPIRV/SPIRVToOCL.h | 2 - llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp | 23 +++-- llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp | 10 +- llvm-spirv/lib/SPIRV/SPIRVUtil.cpp | 99 +++++++----------- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 59 +++++------ llvm-spirv/lib/SPIRV/SPIRVWriter.h | 3 +- 15 files changed, 341 insertions(+), 320 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp index ea963743c536d..ae3611e814097 100644 --- a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp @@ -944,27 +944,24 @@ void OCLToSPIRVBase::visitCallReadImageWithSampler(CallInst *CI, assert(CI->getCalledFunction() && "Unexpected indirect call"); Function *Func = CI->getCalledFunction(); bool IsRetScalar = !CI->getType()->isVectorTy(); - SmallVector ArgStructTys; - getParameterTypes(CI, ArgStructTys); Type *Ret = CI->getType(); - auto *ImageTy = OCLTypeToSPIRVPtr->getAdaptedArgumentType(Func, 0).second; + auto *ImageTy = OCLTypeToSPIRVPtr->getAdaptedArgumentType(Func, 0); if (!ImageTy) - ImageTy = ArgStructTys[0]; - ImageTy = adaptSPIRVImageType(M, ImageTy); - auto *SampledImgStructTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::SampledImg); - auto *SampledImgTy = PointerType::get(SampledImgStructTy, SPIRAS_Global); - Value *SampledImgArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; - auto *SampledImg = addCallInstSPIRV(M, getSPIRVFuncName(OpSampledImage), - SampledImgTy, SampledImgArgs, nullptr, - {ArgStructTys[0], ArgStructTys[1]}, CI, - kSPIRVName::TempSampledImage); + ImageTy = getCallValueType(CI, 0); auto Mutator = mutateCallInst( CI, getSPIRVFuncName(OpImageSampleExplicitLod, std::string(kSPIRVPostfix::ExtDivider) + getPostfixForReturnType(Ret))); - Mutator.replaceArg(0, {SampledImg, SampledImgStructTy}).removeArg(1); + Mutator.mapArg(0, [&](IRBuilder<> &Builder, Value *ImgArg, Type *ImgType) { + auto *SampledImgTy = adjustImageType(ImageTy, kSPIRVTypeName::Image, + kSPIRVTypeName::SampledImg); + Value *SampledImgArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; + return addSPIRVCallPair(Builder, OpSampledImage, SampledImgTy, + SampledImgArgs, {ImgType, Mutator.getType(1)}, + kSPIRVName::TempSampledImage); + }); + Mutator.removeArg(1); unsigned ImgOpMask = getImageSignZeroExt(DemangledName); unsigned ImgOpMaskInsIndex = Mutator.arg_size(); switch (Mutator.arg_size()) { @@ -997,15 +994,7 @@ void OCLToSPIRVBase::visitCallReadImageWithSampler(CallInst *CI, void OCLToSPIRVBase::visitCallGetImageSize(CallInst *CI, StringRef DemangledName) { - StringRef TyName; - SmallVector SubStrs; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - auto IsImg = isOCLImageStructType(ParamTys[0], &TyName); - (void)IsImg; - assert(IsImg); - std::string ImageTyName = getImageBaseTypeName(TyName); - auto Desc = map(ImageTyName); + auto Desc = getImageDescriptor(getCallValueType(CI, 0)); unsigned Dim = getImageDimension(Desc.Dim) + Desc.Arrayed; assert(Dim > 0 && "Invalid image dimension."); assert(CI->arg_size() == 1); @@ -1131,8 +1120,10 @@ void OCLToSPIRVBase::visitCallToAddr(CallInst *CI, StringRef DemangledName) { Mutator .mapArg(Mutator.arg_size() - 1, [&](Value *V) { - return std::pair( - castToInt8Ptr(V, CI), Type::getInt8Ty(V->getContext())); + return std::make_pair( + castToInt8Ptr(V, CI), + TypedPointerType::get(Type::getInt8Ty(V->getContext()), + SPIRAS_Generic)); }) .appendArg(StorageClass); }; @@ -1497,9 +1488,7 @@ void OCLToSPIRVBase::processSubgroupBlockReadWriteINTEL( // reads and vector block reads. void OCLToSPIRVBase::visitSubgroupBlockReadINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - if (isOCLImageStructType(ParamTys[0])) + if (isOCLImageType(getCallValueType(CI, 0))) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockReadINTEL); else Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockReadINTEL); @@ -1512,9 +1501,7 @@ void OCLToSPIRVBase::visitSubgroupBlockReadINTEL(CallInst *CI) { // instructions. void OCLToSPIRVBase::visitSubgroupBlockWriteINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - if (isOCLImageStructType(ParamTys[0])) + if (isOCLImageType(getCallValueType(CI, 0))) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockWriteINTEL); else Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockWriteINTEL); @@ -1614,7 +1601,7 @@ void OCLToSPIRVBase::visitSubgroupAVCWrapperBuiltinCall( std::string MCETName = std::string(kOCLSubgroupsAVCIntel::TypePrefix) + "mce_" + TyKind + "_t"; auto *MCESTy = getSubgroupAVCIntelMCEType(M, MCETName); - auto *MCETy = PointerType::get(MCESTy, SPIRAS_Private); + auto *MCETy = TypedPointerType::get(MCESTy, SPIRAS_Private); std::string ToMCEFName = Prefix + OpKind + "_convert_to_mce_" + TyKind; Op ToMCEOC = OpNop; OCLSPIRVSubgroupAVCIntelBuiltinMap::find(ToMCEFName, &ToMCEOC); @@ -1631,28 +1618,24 @@ void OCLToSPIRVBase::visitSubgroupAVCWrapperBuiltinCall( mutateCallInst(CI, WrappedOC) .mapArg(CI->arg_size() - 1, - [&](Value *Arg, Type *ParamTy) { + [&](IRBuilder<> &Builder, Value *Arg, Type *ParamTy) { // Create conversion function call for the last operand - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(ToMCEOC), MCETy, Arg, - nullptr, {ParamTy}, CI, ""), - MCESTy); + return addSPIRVCallPair(Builder, ToMCEOC, MCETy, {Arg}, + {ParamTy}); }) - .changeReturnType(MCETy, [=](IRBuilder<> &, CallInst *NewCI) { + .changeReturnType(MCETy, [&](IRBuilder<> &Builder, CallInst *NewCI) { // Create conversion function call for the return result - return addCallInstSPIRV(M, getSPIRVFuncName(FromMCEOC), CI->getType(), - NewCI, nullptr, {MCESTy}, CI, ""); + return addSPIRVCall(Builder, FromMCEOC, CI->getType(), {NewCI}, + {MCETy}); }); } else { // Wrapper built-ins which take the 'result_t' argument requires only one // conversion for the argument mutateCallInst(CI, WrappedOC) - .mapArg(CI->arg_size() - 1, [&](Value *Arg, Type *ParamTy) { + .mapArg(CI->arg_size() - 1, [&](IRBuilder<> &Builder, Value *Arg, + Type *ParamTy) { // Create conversion function call for the last operand - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(ToMCEOC), MCETy, Arg, - nullptr, {ParamTy}, CI, ""), - MCESTy); + return addSPIRVCallPair(Builder, ToMCEOC, MCETy, {Arg}, {ParamTy}); }); } } @@ -1676,9 +1659,8 @@ void OCLToSPIRVBase::visitSubgroupAVCBuiltinCallWithSampler( return; // this is not a VME built-in SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - auto *TyIt = - std::find_if(ParamTys.begin(), ParamTys.end(), isSamplerStructTy); + getParameterTypes(CI->getCalledFunction(), ParamTys); + auto *TyIt = std::find_if(ParamTys.begin(), ParamTys.end(), isSamplerTy); assert(TyIt != ParamTys.end() && "Invalid Subgroup AVC Intel built-in call"); unsigned SamplerIndex = TyIt - ParamTys.begin(); Value *SamplerVal = CI->getOperand(SamplerIndex); @@ -1687,30 +1669,24 @@ void OCLToSPIRVBase::visitSubgroupAVCBuiltinCallWithSampler( SmallVector AdaptedTys; for (unsigned I = 0; I < CI->arg_size(); I++) AdaptedTys.push_back( - OCLTypeToSPIRVPtr->getAdaptedArgumentType(CI->getCalledFunction(), I) - .second); + OCLTypeToSPIRVPtr->getAdaptedArgumentType(CI->getCalledFunction(), I)); auto *AdaptedIter = AdaptedTys.begin(); mutateCallInst(CI, OC) - .mapArgs([&](Value *Arg, Type *PointerTy) { - if (!isOCLImageStructType(PointerTy)) - return std::make_pair(Arg, PointerTy); + .mapArgs([&](IRBuilder<> &Builder, Value *Arg, Type *ArgTy) { + if (!isOCLImageType(ArgTy)) + return BuiltinCallMutator::ValueTypePair(Arg, ArgTy); auto *ImageTy = *AdaptedIter++; if (!ImageTy) - ImageTy = PointerTy; - ImageTy = adaptSPIRVImageType(M, ImageTy); - auto *SampledImgStructTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::VmeImageINTEL); - auto *SampledImgTy = - PointerType::get(SampledImgStructTy, SPIRAS_Global); + ImageTy = ArgTy; + auto *SampledImgTy = adjustImageType(ImageTy, kSPIRVTypeName::Image, + kSPIRVTypeName::VmeImageINTEL); Value *SampledImgArgs[] = {Arg, SamplerVal}; - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(OpVmeImageINTEL), SampledImgTy, - SampledImgArgs, nullptr, {PointerTy, SamplerTy}, - CI, kSPIRVName::TempSampledImage), - SampledImgStructTy); + return addSPIRVCallPair(Builder, OpVmeImageINTEL, SampledImgTy, + SampledImgArgs, {ArgTy, SamplerTy}, + kSPIRVName::TempSampledImage); }) .removeArg(SamplerIndex); } diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp index ce0d6ac0de8c6..d742944bb2697 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp @@ -106,12 +106,11 @@ bool OCLTypeToSPIRVBase::runOCLTypeToSPIRV(Module &Module) { return false; } -void OCLTypeToSPIRVBase::addAdaptedType(Value *V, Type *Ty, - unsigned AddrSpace) { +void OCLTypeToSPIRVBase::addAdaptedType(Value *V, Type *Ty) { LLVM_DEBUG(dbgs() << "[add adapted type] "; V->printAsOperand(dbgs(), true, M); dbgs() << " => " << *Ty << '\n'); - AdaptedTy[V] = {Ty, AddrSpace}; + AdaptedTy[V] = Ty; } void OCLTypeToSPIRVBase::addWork(Function *F) { @@ -133,17 +132,16 @@ void OCLTypeToSPIRVBase::adaptFunction(Function *F) { auto Loc = AdaptedTy.find(&I); auto Found = (Loc != AdaptedTy.end()); Changed |= Found; - ArgTys.push_back(Found ? Loc->second.first : I.getType()); + ArgTys.push_back(Found ? Loc->second : I.getType()); if (Found) { - auto *Ty = Loc->second.first; - unsigned AddrSpace = Loc->second.second; + Type *Ty = Loc->second; for (auto &U : I.uses()) { if (auto *CI = dyn_cast(U.getUser())) { auto ArgIndex = CI->getArgOperandNo(&U); auto CF = CI->getCalledFunction(); if (AdaptedTy.count(CF) == 0) { - addAdaptedType(CF->getArg(ArgIndex), Ty, AddrSpace); + addAdaptedType(CF->getArg(ArgIndex), Ty); addWork(CF); } } @@ -156,7 +154,7 @@ void OCLTypeToSPIRVBase::adaptFunction(Function *F) { auto FT = F->getFunctionType(); FT = FunctionType::get(FT->getReturnType(), ArgTys, FT->isVarArg()); - addAdaptedType(F, FT, 0); + addAdaptedType(F, TypedPointerType::get(FT, 0)); } // Handle functions with sampler arguments that don't get called by @@ -181,7 +179,8 @@ void OCLTypeToSPIRVBase::adaptArgumentsBySamplerUse(Module &M) { AdaptedTy.count(SamplerArg) != 0) // Already traced this, move on. continue; - addAdaptedType(SamplerArg, getSamplerStructType(&M), SPIRAS_Constant); + addAdaptedType(SamplerArg, TypedPointerType::get(getSamplerStructType(&M), + SPIRAS_Constant)); auto Caller = cast(SamplerArg)->getParent(); addWork(Caller); TraceArg(Caller, cast(SamplerArg)->getArgNo()); @@ -209,15 +208,16 @@ void OCLTypeToSPIRVBase::adaptFunctionArguments(Function *F) { bool Changed = false; auto Arg = F->arg_begin(); SmallVector ParamTys; - getParameterTypes(F, ParamTys); // If we couldn't get any information from demangling, there is nothing that // can be done. - if (ParamTys.empty()) + if (!getParameterTypes(F, ParamTys)) return; for (unsigned I = 0; I < F->arg_size(); ++I, ++Arg) { - StructType *NewTy = dyn_cast_or_null(ParamTys[I]); + StructType *NewTy = nullptr; + if (auto *TPT = dyn_cast(ParamTys[I])) + NewTy = dyn_cast_or_null(TPT->getElementType()); if (NewTy && NewTy->isOpaque()) { auto STName = NewTy->getStructName(); if (!hasAccessQualifiedName(STName)) @@ -225,10 +225,10 @@ void OCLTypeToSPIRVBase::adaptFunctionArguments(Function *F) { if (STName.startswith(kSPR2TypeName::ImagePrefix)) { auto Ty = STName.str(); auto AccStr = getAccessQualifierFullName(Ty); - addAdaptedType( - &*Arg, - getOrCreateOpaqueStructType(M, mapOCLTypeNameToSPIRV(Ty, AccStr)), - SPIRAS_Global); + addAdaptedType(&*Arg, TypedPointerType::get( + getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(Ty, AccStr)), + SPIRAS_Global)); Changed = true; } } @@ -249,7 +249,8 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { for (unsigned I = 0, E = TypeMD->getNumOperands(); I != E; ++I, ++Arg) { auto OCLTyStr = getMDOperandAsString(TypeMD, I); if (OCLTyStr == OCL_TYPE_NAME_SAMPLER_T) { - addAdaptedType(&(*Arg), getSamplerStructType(M), SPIRAS_Constant); + addAdaptedType(&(*Arg), TypedPointerType::get(getSamplerStructType(M), + SPIRAS_Constant)); Changed = true; } else if (OCLTyStr.startswith("image") && OCLTyStr.endswith("_t")) { auto Ty = (Twine("opencl.") + OCLTyStr).str(); @@ -257,10 +258,10 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { auto AccMD = F->getMetadata(SPIR_MD_KERNEL_ARG_ACCESS_QUAL); assert(AccMD && "Invalid access qualifier metadata"); auto AccStr = getMDOperandAsString(AccMD, I); - addAdaptedType( - &(*Arg), - getOrCreateOpaqueStructType(M, mapOCLTypeNameToSPIRV(Ty, AccStr)), - SPIRAS_Global); + addAdaptedType(&(*Arg), TypedPointerType::get( + getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(Ty, AccStr)), + SPIRAS_Global)); Changed = true; } } @@ -297,15 +298,12 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { // opencl data type x and access qualifier y, and use opencl.image_x.y to // represent image_x type with access qualifier y. // -std::pair -OCLTypeToSPIRVBase::getAdaptedArgumentType(Function *F, unsigned ArgNo) { +Type *OCLTypeToSPIRVBase::getAdaptedArgumentType(Function *F, unsigned ArgNo) { Value *Arg = F->getArg(ArgNo); auto Loc = AdaptedTy.find(Arg); if (Loc == AdaptedTy.end()) - return {nullptr, nullptr}; - Type *PointeeTy = Loc->second.first; - Type *PointerTy = PointerType::get(PointeeTy, Loc->second.second); - return {PointerTy, PointeeTy}; + return nullptr; + return Loc->second; } } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h index 17d3e7dc4a2a3..b0034acfda930 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h @@ -59,25 +59,22 @@ class OCLTypeToSPIRVBase { bool runOCLTypeToSPIRV(llvm::Module &M); - /// Returns the adapted type of the corresponding argument for a function. - /// The first value of the returned pair is the LLVM type of the argument. - /// The second value of the returned pair is the pointer element type of the - /// argument, if the type is a pointer. - std::pair - getAdaptedArgumentType(llvm::Function *F, unsigned ArgNo); + /// Returns the adapted type of the corresponding argument for a function. If + /// the type is a pointer type, it will return a TypedPointerType instead. + llvm::Type *getAdaptedArgumentType(llvm::Function *F, unsigned ArgNo); private: llvm::Module *M; llvm::LLVMContext *Ctx; - // Map of argument/Function -> {pointee type, address space} - std::map> AdaptedTy; + // Map of argument/Function -> adapted type (probably TypedPointerType) + std::map AdaptedTy; std::set WorkSet; // Functions to be adapted void adaptFunctionArguments(llvm::Function *F); void adaptArgumentsByMetadata(llvm::Function *F); void adaptArgumentsBySamplerUse(llvm::Module &M); void adaptFunction(llvm::Function *F); - void addAdaptedType(llvm::Value *V, llvm::Type *PointeeTy, unsigned AS); + void addAdaptedType(llvm::Value *V, llvm::Type *Ty); void addWork(llvm::Function *F); }; diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index d83e36e4a6f5c..5be31a328e036 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -1336,9 +1336,12 @@ Value *unwrapSpecialTypeInitializer(Value *V) { return nullptr; } -bool isSamplerStructTy(Type *Ty) { - auto *STy = dyn_cast_or_null(Ty); - return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler; +bool isSamplerTy(Type *Ty) { + if (auto *TPT = dyn_cast_or_null(Ty)) { + auto *STy = dyn_cast_or_null(TPT->getElementType()); + return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler; + } + return false; } bool isPipeOrAddressSpaceCastBI(const StringRef MangledName) { diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.h b/llvm-spirv/lib/SPIRV/OCLUtil.h index bd4f6dcfc217e..9eb3166571c6a 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.h +++ b/llvm-spirv/lib/SPIRV/OCLUtil.h @@ -499,7 +499,7 @@ bool isEnqueueKernelBI(const StringRef MangledName); bool isKernelQueryBI(const StringRef MangledName); /// Check that the type is the sampler_t -bool isSamplerStructTy(Type *Ty); +bool isSamplerTy(Type *Ty); // Checks if the binary operator is an unfused fmul + fadd instruction. bool isUnfusedMulAdd(BinaryOperator *B); diff --git a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp index fae86366f40d1..915de2a01aaee 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp @@ -64,9 +64,15 @@ BuiltinCallMutator::BuiltinCallMutator( : CI(CI), FuncName(FuncName), Attrs(CI->getCalledFunction()->getAttributes()), ReturnTy(CI->getType()), Args(CI->args()), Rules(Rules), Builder(CI) { - getParameterTypes(CI->getCalledFunction(), PointerTypes, - std::move(NameMapFn)); - PointerTypes.resize(Args.size(), nullptr); + bool DidDemangle = getParameterTypes(CI->getCalledFunction(), PointerTypes, + std::move(NameMapFn)); + if (!DidDemangle) { + // TODO: PipeBlocking.ll causes demangling failures. + // assert(isNonMangledOCLBuiltin(CI->getCalledFunction()->getName()) && + // "SPIR-V builtin functions should be mangled"); + for (Value *Arg : Args) + PointerTypes.push_back(Arg->getType()); + } } BuiltinCallMutator::BuiltinCallMutator(BuiltinCallMutator &&Other) @@ -84,9 +90,15 @@ Value *BuiltinCallMutator::doConversion() { assert(CI && "Need to have a call instruction to do the conversion"); auto Mangler = makeMangler(CI, Rules); for (unsigned I = 0; I < Args.size(); I++) { - Mangler->getTypeMangleInfo(I).PointerTy = PointerTypes[I]; + Mangler->getTypeMangleInfo(I).PointerTy = + dyn_cast(PointerTypes[I]); } assert(Attrs.getNumAttrSets() <= Args.size() + 2 && "Too many attributes?"); + + // Sanitize the return type, in case it's a TypedPointerType. + if (auto *TPT = dyn_cast(ReturnTy)) + ReturnTy = PointerType::get(TPT->getElementType(), TPT->getAddressSpace()); + CallInst *NewCall = Builder.Insert(addCallInst(CI->getModule(), FuncName, ReturnTy, Args, &Attrs, nullptr, Mangler.get())); @@ -110,7 +122,7 @@ BuiltinCallMutator &BuiltinCallMutator::setArgs(ArrayRef NewArgs) { assert(!Arg->getType()->isPointerTy() && "Cannot use this signature with pointer types"); Args.push_back(Arg); - PointerTypes.emplace_back(); + PointerTypes.push_back(Arg->getType()); } return *this; } @@ -151,23 +163,10 @@ static void moveAttributes(LLVMContext &Ctx, AttributeList &Attrs, Attrs = AttributeList::get(Ctx, NewAttrs); } -// Convert a ValueTypePair to a TypedPointerType for storing in the PointerTypes -// array. -static TypedPointerType *toTPT(BuiltinCallMutator::ValueTypePair Pair) { - if (!Pair.second) - return nullptr; - unsigned AS = 0; - if (auto *TPT = dyn_cast(Pair.first->getType())) - AS = TPT->getAddressSpace(); - else if (isa(Pair.first->getType())) - AS = Pair.first->getType()->getPointerAddressSpace(); - return TypedPointerType::get(Pair.second, AS); -} - BuiltinCallMutator &BuiltinCallMutator::insertArg(unsigned Index, ValueTypePair Arg) { Args.insert(Args.begin() + Index, Arg.first); - PointerTypes.insert(PointerTypes.begin() + Index, toTPT(Arg)); + PointerTypes.insert(PointerTypes.begin() + Index, Arg.second); moveAttributes(CI->getContext(), Attrs, Index, Args.size() - Index, Index + 1); return *this; @@ -176,7 +175,7 @@ BuiltinCallMutator &BuiltinCallMutator::insertArg(unsigned Index, BuiltinCallMutator &BuiltinCallMutator::replaceArg(unsigned Index, ValueTypePair Arg) { Args[Index] = Arg.first; - PointerTypes[Index] = toTPT(Arg); + PointerTypes[Index] = Arg.second; Attrs = Attrs.removeParamAttributes(CI->getContext(), Index); return *this; } @@ -211,3 +210,71 @@ BuiltinCallMutator BuiltinCallHelper::mutateCallInst(CallInst *CI, std::string FuncName) { return BuiltinCallMutator(CI, std::move(FuncName), Rules, NameMapFn); } + +Value *BuiltinCallHelper::addSPIRVCall(IRBuilder<> &Builder, spv::Op Opcode, + Type *ReturnTy, ArrayRef Args, + ArrayRef ArgTys, + const Twine &Name) { + // Sanitize the return type, in case it's a TypedPointerType. + if (auto *TPT = dyn_cast(ReturnTy)) + ReturnTy = PointerType::get(TPT->getElementType(), TPT->getAddressSpace()); + + // Copy the types into the mangling info. + BuiltinFuncMangleInfo BtnInfo; + for (unsigned I = 0; I < ArgTys.size(); I++) { + if (Args[I]->getType()->isPointerTy()) { + assert(cast(Args[I]->getType()) + ->isOpaqueOrPointeeTypeMatches( + cast(ArgTys[I])->getElementType())); + BtnInfo.getTypeMangleInfo(I).PointerTy = ArgTys[I]; + } + } + + // Create the function and the call. + auto *F = getOrCreateFunction(M, ReturnTy, getTypes(Args), + getSPIRVFuncName(Opcode), &BtnInfo); + return Builder.CreateCall(F, Args, ReturnTy->isVoidTy() ? "" : Name); +} + +Type *BuiltinCallHelper::adjustImageType(Type *T, StringRef OldImageKind, + StringRef NewImageKind) { + if (auto *TypedPtrTy = dyn_cast(T)) { + Type *StructTy = TypedPtrTy->getElementType(); + // Adapt opencl.* struct type names to spirv.* struct type names. + if (isOCLImageType(T)) { + auto ImageTypeName = StructTy->getStructName(); + StringRef Acc = kAccessQualName::ReadOnly; + if (hasAccessQualifiedName(ImageTypeName)) + Acc = getAccessQualifierFullName(ImageTypeName); + StructTy = getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(ImageTypeName, Acc)); + } + + // Change type name (e.g., spirv.Image -> spirv.SampledImg) if necessary. + StringRef Postfixes; + if (isSPIRVStructType(StructTy, OldImageKind, &Postfixes)) + StructTy = getOrCreateOpaqueStructType( + M, getSPIRVTypeName(NewImageKind, Postfixes)); + else { + report_fatal_error("Type did not have expected image kind"); + } + return TypedPointerType::get(StructTy, TypedPtrTy->getAddressSpace()); + } + report_fatal_error("Expected type to be a SPIRV image type"); +} + +BuiltinCallMutator::ValueTypePair +BuiltinCallHelper::getCallValue(CallInst *CI, unsigned ArgNo) { + Function *CalledFunc = CI->getCalledFunction(); + assert(CalledFunc && "Unexpected indirect call"); + if (CalledFunc != CachedFunc) { + CachedFunc = CalledFunc; + [[maybe_unused]] bool DidDemangle = + getParameterTypes(CalledFunc, CachedParameterTypes, NameMapFn); + assert(DidDemangle && "Expected SPIR-V builtins to be properly mangled"); + } + + Value *ParamValue = CI->getArgOperand(ArgNo); + Type *ParamType = CachedParameterTypes[ArgNo]; + return {ParamValue, ParamType}; +} diff --git a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h index e5658a0f0db89..90774a6594a48 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h +++ b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h @@ -80,7 +80,7 @@ class BuiltinCallMutator { // The arguments for the new call instruction. llvm::SmallVector Args; // The pointer element types for the new call instruction. - llvm::SmallVector PointerTypes; + llvm::SmallVector PointerTypes; // The mangler rules to use for the new call instruction. ManglingRules Rules; @@ -116,11 +116,14 @@ class BuiltinCallMutator { /// Get the corresponding argument for the new call. llvm::Value *getArg(unsigned Index) const { return Args[Index]; } + llvm::Type *getType(unsigned Index) const { return PointerTypes[Index]; } + /// Return the pointer element type of the corresponding index, or nullptr if /// it is not a pointer. llvm::Type *getPointerElementType(unsigned Index) const { - llvm::TypedPointerType *ElTy = PointerTypes[Index]; - return ElTy ? ElTy->getElementType() : nullptr; + if (auto *TPT = llvm::dyn_cast(PointerTypes[Index])) + return TPT->getElementType(); + return nullptr; } /// A pair representing both the LLVM value of an argument and its @@ -128,7 +131,7 @@ class BuiltinCallMutator { /// implicit conversion from an LLVM value object (but only if it is not of /// pointer type), or by the appropriate std::pair type. struct ValueTypePair : public std::pair { - ValueTypePair(llvm::Value *V) : pair(V, nullptr) { + ValueTypePair(llvm::Value *V) : pair(V, V->getType()) { assert(!V->getType()->isPointerTy() && "Must specify a pointer element type if value is a pointer."); } @@ -181,7 +184,7 @@ class BuiltinCallMutator { BuiltinCallMutator &moveArg(unsigned FromIndex, unsigned ToIndex) { if (FromIndex == ToIndex) return *this; - ValueTypePair Pair(Args[FromIndex], getPointerElementType(FromIndex)); + ValueTypePair Pair(Args[FromIndex], getType(FromIndex)); removeArg(FromIndex); insertArg(ToIndex, Pair); return *this; @@ -200,15 +203,15 @@ class BuiltinCallMutator { /// When present, the IRBuilder parameter corresponds to a builder that is set /// to insert immediately before the new call instruction. The Value parameter /// corresponds to the argument to be mutated. The Type parameter, when - /// present, corresponds to the pointer element type of the argument, or null - /// when it is not present. + /// present, will be either a TypedPointerType representing the "true" type of + /// the value, or the argument's type otherwise. template BuiltinCallMutator &mapArg(unsigned Index, FnType Func) { using namespace llvm; using std::is_invocable; IRBuilder<> Builder(CI); Value *V = Args[Index]; - [[maybe_unused]] Type *T = getPointerElementType(Index); + [[maybe_unused]] Type *T = getType(Index); // Dispatch the function call as appropriate, based on the types that the // function may be called with. @@ -272,6 +275,52 @@ class BuiltinCallHelper { /// to the given SPIR-V opcode (whose name is used in the lookup map of /// getSPIRVFuncName). BuiltinCallMutator mutateCallInst(llvm::CallInst *CI, spv::Op Opcode); + + /// Create a call to a SPIR-V builtin function (specified via opcode). + /// The return type and argument types may be TypedPointerType, if the actual + /// LLVM type is a pointer type. + llvm::Value *addSPIRVCall(llvm::IRBuilder<> &Builder, spv::Op Opcode, + llvm::Type *ReturnTy, + llvm::ArrayRef Args, + llvm::ArrayRef ArgTys, + const llvm::Twine &Name = ""); + + /// Create a call to a SPIR-V builtin function, returning a value and type + /// pair suitable for use in BuiltinCallMutator::replaceArg and similar + /// functions. + BuiltinCallMutator::ValueTypePair + addSPIRVCallPair(llvm::IRBuilder<> &Builder, spv::Op Opcode, + llvm::Type *ReturnTy, llvm::ArrayRef Args, + llvm::ArrayRef ArgTys, + const llvm::Twine &Name = "") { + llvm::Value *V = + addSPIRVCall(Builder, Opcode, ReturnTy, Args, ArgTys, Name); + return BuiltinCallMutator::ValueTypePair(V, ReturnTy); + } + + /// Adapt the various SPIR-V image types, for example changing a "spirv.Image" + /// type into a "spirv.SampledImage" type with identical parameters. + /// + /// The input type is expected to be a TypedPointerType to either a + /// "spirv.*" or "opencl.*" struct type. In the case of "opencl.*" struct + /// types, it will first convert it into the corresponding "spirv.Image" + /// struct type. + /// + /// If the image type does not match OldImageKind, this method will abort. + llvm::Type *adjustImageType(llvm::Type *T, llvm::StringRef OldImageKind, + llvm::StringRef NewImageKind); + +private: + llvm::SmallVector CachedParameterTypes; + llvm::Function *CachedFunc = nullptr; + +public: + BuiltinCallMutator::ValueTypePair getCallValue(llvm::CallInst *CI, + unsigned ArgNo); + + llvm::Type *getCallValueType(llvm::CallInst *CI, unsigned ArgNo) { + return getCallValue(CI, ArgNo).second; + } }; } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index 60500f399b77c..a97c8745ddade 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -612,12 +612,9 @@ Scope getArgAsScope(CallInst *CI, unsigned I); /// \param I argument index. Decoration getArgAsDecoration(CallInst *CI, unsigned I); -/// Check if a type is SPIRV sampler type. -bool isSPIRVSamplerType(llvm::Type *Ty); - -/// Check if a type is OCL image type (if pointed to). +/// Check if a type is OCL image type. /// \return type name without "opencl." prefix. -bool isOCLImageStructType(llvm::Type *Ty, StringRef *Name = nullptr); +bool isOCLImageType(llvm::Type *Ty, StringRef *Name = nullptr); /// \param BaseTyName is the type name as in spirv.BaseTyName.Postfixes /// \param Postfix contains postfixes extracted from the SPIR-V image @@ -861,12 +858,6 @@ std::string getSPIRVTypeName(StringRef BaseTyName, StringRef Postfixes = ""); /// Checks if given type name is either ConstantSampler or ConsantPipeStorage. bool isSPIRVConstantName(StringRef TyName); -/// Get SPIR-V type by changing the type name from spirv.OldName.Postfixes -/// to spirv.NewName.Postfixes. -Type *getSPIRVStructTypeByChangeBaseTypeName(Module *M, Type *T, - StringRef OldName, - StringRef NewName); - /// Get the postfixes of SPIR-V image type name as in spirv.Image.postfixes. std::string getSPIRVImageTypePostfixes(StringRef SampledType, SPIRVTypeImageDescriptor Desc, @@ -876,10 +867,6 @@ std::string getSPIRVImageTypePostfixes(StringRef SampledType, /// friendly LLVM IR. std::string getSPIRVImageSampledTypeName(SPIRVType *Ty); -/// Translates OpenCL image type names to SPIR-V. -/// E.g. %opencl.image1d_rw_t -> %spirv.Image._void_0_0_0_0_0_0_2 -Type *adaptSPIRVImageType(Module *M, Type *PointeeType); - /// Get LLVM type for sampled type of SPIR-V image type by postfix. Type *getLLVMTypeForSPIRVImageSampledTypePostfix(StringRef Postfix, LLVMContext &Ctx); @@ -888,6 +875,9 @@ Type *getLLVMTypeForSPIRVImageSampledTypePostfix(StringRef Postfix, /// E.g. opencl.image2d_ro_t.3 -> image2d_t std::string getImageBaseTypeName(StringRef Name); +/// Extract the image type descriptor from the given image type. +SPIRVTypeImageDescriptor getImageDescriptor(Type *Ty); + /// Map OpenCL opaque type name to SPIR-V type name. std::string mapOCLTypeNameToSPIRV(StringRef Name, StringRef Acc = ""); @@ -941,18 +931,16 @@ bool containsUnsignedAtomicType(StringRef Name); std::string mangleBuiltin(StringRef UniqName, ArrayRef ArgTypes, BuiltinFuncMangleInfo *BtnInfo); -/// Extract the pointee types of arguments from a mangled function name. If the -/// corresponding type is not a pointer to a struct type, its value will be a -/// nullptr instead. -void getParameterTypes( +/// Extract the true pointer types, expressed as a TypedPointerType, of +/// arguments from a mangled function name. If the corresponding type is not a +/// pointer type, its value will be the argument's actual type instead. Returns +/// true if the function name was successfully demangled. +bool getParameterTypes( Function *F, SmallVectorImpl &ArgTys, std::function StructNameMapFn = nullptr); -inline void getParameterTypes(CallInst *CI, SmallVectorImpl &ArgTys) { +inline bool getParameterTypes(CallInst *CI, SmallVectorImpl &ArgTys) { return getParameterTypes(CI->getCalledFunction(), ArgTys); } -void getParameterTypes( - Function *F, SmallVectorImpl &ArgTys, - std::function StructNameMapFn = nullptr); /// Mangle a function from OpenCL extended instruction set in SPIR-V friendly IR /// manner @@ -1013,6 +1001,10 @@ bool hasLoopMetadata(const Module *M); // If so, return it's extended opcode in ExtOp. bool isSPIRVOCLExtInst(const CallInst *CI, OCLExtOpKind *ExtOp); +/// Returns true if a function name corresponds to an OpenCL builtin that is not +/// expected to have name mangling. +bool isNonMangledOCLBuiltin(StringRef Name); + // check LLVM Intrinsics type(s) for validity bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index ac8eac6fd5cd0..c677ef9fb17be 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -256,33 +256,14 @@ void SPIRVToOCLBase::visitCastInst(CastInst &Cast) { void SPIRVToOCLBase::visitCallSPIRVImageQuerySize(CallInst *CI) { // Get image type - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - StructType *ImgTy = cast(ParamTys[0]); - assert(ImgTy && ImgTy->isOpaque() && - "image type must be an opaque structure"); - StringRef ImgTyName = ImgTy->getName(); - assert(ImgTyName.startswith("opencl.image") && "not an OCL image type"); - - unsigned ImgDim = 0; - bool ImgArray = false; - - if (ImgTyName.startswith("opencl.image1d")) { - ImgDim = 1; - } else if (ImgTyName.startswith("opencl.image2d")) { - ImgDim = 2; - } else if (ImgTyName.startswith("opencl.image3d")) { - ImgDim = 3; - } - assert(ImgDim != 0 && "unexpected image dimensionality"); - - if (ImgTyName.count("_array_") != 0) { - ImgArray = true; - } + Type *ImgTy = getCallValueType(CI, 0); + auto Desc = getImageDescriptor(ImgTy); + unsigned ImgDim = getImageDimension(Desc.Dim); + bool ImgArray = Desc.Arrayed; AttributeList Attributes = CI->getCalledFunction()->getAttributes(); BuiltinFuncMangleInfo Mangle; - Mangle.getTypeMangleInfo(0).PointerTy = TypedPointerType::get(ImgTy, 0); + Mangle.getTypeMangleInfo(0).PointerTy = ImgTy; Type *Int32Ty = Type::getInt32Ty(*Ctx); Instruction *GetImageSize = nullptr; @@ -590,7 +571,8 @@ void SPIRVToOCLBase::visitCallSPIRVPipeBuiltin(CallInst *CI, Op OC) { if (T != NewTy) { P = Builder.CreatePointerBitCastOrAddrSpaceCast(P, NewTy); } - return std::pair(P, Builder.getInt8Ty()); + return std::make_pair( + P, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic)); }); } } @@ -758,31 +740,25 @@ void SPIRVToOCLBase::visitCallSPIRVImageSampleExplicitLodBuiltIn(CallInst *CI, T = VT->getElementType(); auto Mutator = mutateCallImageOperands(CI, kOCLBuiltinName::SampledReadImage, T, 2); + + CallInst *CallSampledImg = cast(CI->getArgOperand(0)); + auto Img = getCallValue(CallSampledImg, 0); + auto Sampler = getCallValue(CallSampledImg, 1); bool IsDepthImage = false; - Value *Sampler = nullptr; - Type *SamplerTy = nullptr; Mutator.mapArg(0, [&](Value *SampledImg) { - CallInst *CallSampledImg = cast(SampledImg); - SmallVector SampledArgTys; - getParameterTypes(CallSampledImg, SampledArgTys); - Type *ImgTy = SampledArgTys[0]; - SamplerTy = SampledArgTys[1]; - StringRef ImageTypeName; - if (isOCLImageStructType(ImgTy, &ImageTypeName)) + if (isOCLImageType(Img.second, &ImageTypeName)) IsDepthImage = ImageTypeName.contains("_depth_"); - auto Img = CallSampledImg->getArgOperand(0); - Sampler = CallSampledImg->getArgOperand(1); if (CallSampledImg->hasOneUse()) { CallSampledImg->replaceAllUsesWith( UndefValue::get(CallSampledImg->getType())); CallSampledImg->dropAllReferences(); CallSampledImg->eraseFromParent(); } - return std::make_pair(Img, ImgTy); + return Img; }); - Mutator.insertArg(1, {Sampler, SamplerTy}); + Mutator.insertArg(1, Sampler); if (IsDepthImage) Mutator.changeReturnType(T, [&](IRBuilder<> &Builder, CallInst *NewCI) { return Builder.CreateInsertElement( @@ -878,14 +854,12 @@ void SPIRVToOCLBase::visitCallSPIRVAvcINTELEvaluateBuiltIn(CallInst *CI, mutateCallInst(CI, OCLSPIRVSubgroupAVCIntelBuiltinMap::rmap(OC)); if (NumImages) { CallInst *SrcImage = cast(Mutator.getArg(0)); - SmallVector SrcImageTys; - getParameterTypes(SrcImage, SrcImageTys); if (NumImages == 1) { // Multi reference opcode - remove src image OpVmeImageINTEL opcode // and replace it with corresponding OpImage and OpSampler arguments size_t SamplerPos = Mutator.arg_size() - 1; - Mutator.replaceArg(0, {SrcImage->getOperand(0), SrcImageTys[0]}); - Mutator.insertArg(SamplerPos, {SrcImage->getOperand(1), SrcImageTys[1]}); + Mutator.replaceArg(0, getCallValue(SrcImage, 0)); + Mutator.insertArg(SamplerPos, getCallValue(SrcImage, 1)); } else { CallInst *FwdRefImage = cast(Mutator.getArg(1)); CallInst *BwdRefImage = @@ -895,17 +869,15 @@ void SPIRVToOCLBase::visitCallSPIRVAvcINTELEvaluateBuiltIn(CallInst *CI, // opcodes and OpSampler Mutator.removeArgs(0, NumImages); // insert source OpImage and OpSampler - Mutator.insertArg(0, {SrcImage->getOperand(0), SrcImageTys[0]}); - Mutator.insertArg(1, {SrcImage->getOperand(1), SrcImageTys[1]}); + Mutator.insertArg(0, getCallValue(SrcImage, 0)); + Mutator.insertArg(1, getCallValue(SrcImage, 1)); // insert reference OpImage - getParameterTypes(FwdRefImage, SrcImageTys); - Mutator.insertArg(1, {FwdRefImage->getOperand(0), SrcImageTys[0]}); + Mutator.insertArg(1, getCallValue(FwdRefImage, 0)); EraseVmeImageCall(SrcImage); EraseVmeImageCall(FwdRefImage); if (BwdRefImage) { // Dual reference opcode - insert second reference OpImage argument - getParameterTypes(BwdRefImage, SrcImageTys); - Mutator.insertArg(2, {BwdRefImage->getOperand(0), SrcImageTys[0]}); + Mutator.insertArg(2, getCallValue(BwdRefImage, 0)); EraseVmeImageCall(BwdRefImage); } } @@ -1129,11 +1101,6 @@ std::string SPIRVToOCLBase::translateOpaqueType(StringRef STName) { return OCLOpaqueName; } -void SPIRVToOCLBase::getParameterTypes(CallInst *CI, - SmallVectorImpl &Tys) { - ::getParameterTypes(CI->getCalledFunction(), Tys, translateOpaqueType); -} - void addSPIRVBIsLoweringPass(ModulePassManager &PassMgr, SPIRV::BIsRepresentation BIsRep) { switch (BIsRep) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h index 204cd72e7757e..1b92fc96bd140 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h @@ -280,8 +280,6 @@ class SPIRVToOCLBase : public InstVisitor, static std::string getOCLPipeOpaqueType(SmallVector &Postfixes); - void getParameterTypes(CallInst *CI, SmallVectorImpl &Tys); - static std::string translateOpaqueType(StringRef STName); /// Mutate the call instruction based on (optional) image operands at position diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp index bce8ed588d079..ee645731d69dc 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp @@ -174,17 +174,17 @@ CallInst *SPIRVToOCL20Base::mutateCommonAtomicArguments(CallInst *CI, Op OC) { auto OrderIdx = Ptr + 2; auto Mutator = mutateCallInst(CI, Name); - Mutator.mapArgs([=](Value *PtrArg, Type *PtrElemTy) { - Type *PtrArgTy = PtrArg->getType(); - if (PtrArgTy->isPointerTy()) { - if (PtrArgTy->getPointerAddressSpace() != SPIRAS_Generic) { - Type *FixedPtr = PointerType::getWithSamePointeeType( - cast(PtrArgTy), SPIRAS_Generic); - PtrArg = CastInst::CreatePointerBitCastOrAddrSpaceCast( - PtrArg, FixedPtr, PtrArg->getName() + ".as", CI); + Mutator.mapArgs([=](IRBuilder<> &Builder, Value *PtrArg, Type *PtrArgTy) { + if (auto *TypedPtrTy = dyn_cast(PtrArgTy)) { + if (TypedPtrTy->getAddressSpace() != SPIRAS_Generic) { + Type *ElementTy = TypedPtrTy->getElementType(); + Type *FixedPtr = PointerType::get(ElementTy, SPIRAS_Generic); + PtrArg = Builder.CreateAddrSpaceCast(PtrArg, FixedPtr, + PtrArg->getName() + ".as"); + PtrArgTy = TypedPointerType::get(ElementTy, SPIRAS_Generic); } } - return std::make_pair(PtrArg, PtrElemTy); + return std::make_pair(PtrArg, PtrArgTy); }); Mutator.mapArg(ScopeIdx, [=](Value *Arg) { return SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(Arg, CI); @@ -224,7 +224,7 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) { cast(PExpected->getType()), AddrSpc); Value *V = Builder.CreateAddrSpaceCast( PExpected, PtrTyAS, PExpected->getName() + ".as"); - return std::make_pair(V, MemTy); + return std::make_pair(V, TypedPointerType::get(MemTy, AddrSpc)); }) .moveArg(4, 2) .changeReturnType(Type::getInt1Ty(*Ctx), [=](IRBuilder<> &Builder, @@ -264,7 +264,8 @@ void SPIRVToOCL20Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) { Mutator.mapArg(6, [=](IRBuilder<> &Builder, Value *Invoke) { Value *Replace = CastInst::CreatePointerBitCastOrAddrSpaceCast( Invoke, Builder.getInt8PtrTy(SPIRAS_Generic), "", CI); - return std::pair(Replace, Builder.getInt8Ty()); + return std::make_pair( + Replace, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic)); }); if (!HasVaargs) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp b/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp index ab16374b027d3..c61ecdc22d941 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp @@ -229,13 +229,13 @@ void SPIRVTypeScavenger::deduceFunctionType(Function &F) { // If the function is a mangled name, try to recover types from the Itanium // name mangling. if (F.getName().startswith("_Z")) { - SmallVector ParameterTypes; - getParameterTypes(&F, ParameterTypes); + SmallVector ParamTypes; + getParameterTypes(&F, ParamTypes); for (Argument *Arg : PointerArgs) { - if (auto *Ty = ParameterTypes[Arg->getArgNo()]) { - DeducedTypes[Arg] = Ty; + if (auto *Ty = dyn_cast(ParamTypes[Arg->getArgNo()])) { + DeducedTypes[Arg] = Ty->getElementType(); LLVM_DEBUG(dbgs() << "Arg " << Arg->getArgNo() << " of " << F.getName() - << " has type " << *Ty << "\n"); + << " has type " << *Ty->getElementType() << "\n"); } } } diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index b5351ceead32f..fb5be8af701b7 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -250,19 +250,19 @@ void getFunctionTypeParameterTypes(llvm::FunctionType *FT, bool isVoidFuncTy(FunctionType *FT) { return FT->getReturnType()->isVoidTy(); } -bool isOCLImageStructType(llvm::Type *Ty, StringRef *Name) { - if (auto *ST = dyn_cast_or_null(Ty)) - if (ST->isOpaque()) { - auto FullName = ST->getName(); - if (FullName.find(kSPR2TypeName::ImagePrefix) == 0) { - if (Name) - *Name = FullName.drop_front(strlen(kSPR2TypeName::OCLPrefix)); - return true; +bool isOCLImageType(llvm::Type *Ty, StringRef *Name) { + if (auto *TPT = dyn_cast_or_null(Ty)) + if (auto *ST = dyn_cast_or_null(TPT->getElementType())) + if (ST->isOpaque()) { + auto FullName = ST->getName(); + if (FullName.find(kSPR2TypeName::ImagePrefix) == 0) { + if (Name) + *Name = FullName.drop_front(strlen(kSPR2TypeName::OCLPrefix)); + return true; + } } - } return false; } - /// \param BaseTyName is the type Name as in spirv.BaseTyName.Postfixes /// \param Postfix contains postfixes extracted from the SPIR-V image /// type Name as spirv.BaseTyName.Postfixes. @@ -722,15 +722,6 @@ static StringRef stringify(const itanium_demangle::NameType *Node) { return StringRef(Str.begin(), Str.size()); } -void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, - std::function NameMapFn) { - SmallVector PIPs; - getParameterTypes(F, PIPs, std::move(NameMapFn)); - for (auto *Pair : PIPs) { - ArgTys.push_back(Pair ? Pair->getElementType() : nullptr); - } -} - template static TypedPointerType * parseNode(Module *M, const llvm::itanium_demangle::Node *ParamType, @@ -838,14 +829,14 @@ parseNode(Module *M, const llvm::itanium_demangle::Node *ParamType, return PointeeTy ? TypedPointerType::get(PointeeTy, AS) : nullptr; } -void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, +bool getParameterTypes(Function *F, SmallVectorImpl &ArgTys, std::function NameMapFn) { using namespace llvm::itanium_demangle; // If there's no mangled name, we can't do anything. Also, if there's no // parameters, do nothing. StringRef Name = F->getName(); if (!Name.startswith("_Z") || F->arg_empty()) - return; + return Name.startswith("_Z"); Module *M = F->getParent(); auto GetStructType = [&](StringRef Name) { @@ -860,7 +851,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, bool HasSret = false; for (Argument &Arg : F->args()) { if (!Arg.getType()->isPointerTy()) - ArgTys.push_back(nullptr); + ArgTys.push_back(Arg.getType()); else if (Type *Ty = Arg.getParamStructRetType()) { assert(!HasSret && &Arg == F->getArg(0) && "sret parameter should only appear on the first argument"); @@ -871,7 +862,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, else ArgTys.push_back(TypedPointerType::get(Ty, 0)); } else { - ArgTys.push_back(nullptr); + ArgTys.push_back(Arg.getType()); } } @@ -894,7 +885,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, // name encoding, bail out. auto *RootNode = dyn_cast_or_null(Demangler.parse()); if (!RootNode) - return; + return false; // Get the parameter list. If the function is a vararg function, drop the last // parameter. @@ -912,7 +903,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, } else { LLVM_DEBUG(dbgs() << "[getParameterTypes] function " << MangledName << " was expected to have a varargs parameter\n"); - return; + return false; } } @@ -923,21 +914,26 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, << " appears to have " << Params.size() << " arguments but has " << (ArgTys.end() - ArgIter) << "\n"); - return; + return false; } + // Overwrite the types of pointer-typed arguments with information from + // demangling. + bool DemangledSuccessfully = true; for (auto *ParamType : Params) { - Type *ArgTy = F->getArg(ArgIter - ArgTys.begin())->getType(); - TypedPointerType *PointeeTy = parseNode(M, ParamType, GetStructType); - if (ArgTy->isPointerTy() && PointeeTy == nullptr) { - PointeeTy = TypedPointerType::get(Type::getInt8Ty(ArgTy->getContext()), - ArgTy->getPointerAddressSpace()); - LLVM_DEBUG(dbgs() << "Failed to recover type of argument " - << (ArgIter - ArgTys.begin()) << " of function " - << F->getName() << "\n"); - } - *ArgIter++ = PointeeTy; + Type *ArgTy = *ArgIter; + Type *DemangledTy = parseNode(M, ParamType, GetStructType); + if (ArgTy->isPointerTy() && DemangledTy == nullptr) { + DemangledTy = TypedPointerType::get(Type::getInt8Ty(ArgTy->getContext()), + ArgTy->getPointerAddressSpace()); + LLVM_DEBUG(dbgs() << "Failed to recover type of argument " << *ArgTy + << " of function " << F->getName() << "\n"); + DemangledSuccessfully = false; + } else if (!DemangledTy) + DemangledTy = ArgTy; + *ArgIter++ = DemangledTy; } + return DemangledSuccessfully; } CallInst *mutateCallInst( @@ -1553,17 +1549,6 @@ bool isSPIRVConstantName(StringRef TyName) { return false; } -Type *getSPIRVStructTypeByChangeBaseTypeName(Module *M, Type *T, - StringRef OldName, - StringRef NewName) { - StringRef Postfixes; - if (isSPIRVStructType(T, OldName, &Postfixes)) - return getOrCreateOpaqueStructType(M, getSPIRVTypeName(NewName, Postfixes)); - LLVM_DEBUG(dbgs() << " Invalid SPIR-V type " << *T << '\n'); - llvm_unreachable("Invalid SPIR-V type"); - return nullptr; -} - std::string getSPIRVImageTypePostfixes(StringRef SampledType, SPIRVTypeImageDescriptor Desc, SPIRVAccessQualifierKind Acc) { @@ -1665,6 +1650,13 @@ std::string mapOCLTypeNameToSPIRV(StringRef Name, StringRef Acc) { return getSPIRVTypeName(BaseTy, OS.str()); } +SPIRVTypeImageDescriptor getImageDescriptor(Type *Ty) { + StringRef TyName; + [[maybe_unused]] bool IsImg = isOCLImageType(Ty, &TyName); + assert(IsImg && "Must be an image type"); + return map(getImageBaseTypeName(TyName)); +} + bool eraseIfNoUse(Function *F) { bool Changed = false; if (!F) @@ -1825,19 +1817,6 @@ StringRef getAccessQualifierFullName(StringRef TyName) { .Case(kAccessQualPostfix::ReadWrite, kAccessQualName::ReadWrite); } -/// Translates OpenCL image type names to SPIR-V. -Type *adaptSPIRVImageType(Module *M, Type *PointeeType) { - if (isOCLImageStructType(PointeeType)) { - auto ImageTypeName = PointeeType->getStructName(); - StringRef Acc = kAccessQualName::ReadOnly; - if (hasAccessQualifiedName(ImageTypeName)) - Acc = getAccessQualifierFullName(ImageTypeName); - return getOrCreateOpaqueStructType( - M, mapOCLTypeNameToSPIRV(ImageTypeName, Acc)); - } - return PointeeType; -} - llvm::PointerType *getOCLClkEventType(Module *M) { return getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, SPIRAS_Private); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 56ece5aad8c9f..3dacdf8e4f238 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -162,7 +162,8 @@ static void translateSEVDecoration(Attribute Sev, SPIRVValue *Val) { } LLVMToSPIRVBase::LLVMToSPIRVBase(SPIRVModule *SMod) - : M(nullptr), Ctx(nullptr), BM(SMod), SrcLang(0), SrcLangVer(0) { + : BuiltinCallHelper(ManglingRules::None), M(nullptr), Ctx(nullptr), + BM(SMod), SrcLang(0), SrcLangVer(0) { DbgTran = std::make_unique(nullptr, SMod, this); } @@ -173,6 +174,7 @@ LLVMToSPIRVBase::~LLVMToSPIRVBase() { bool LLVMToSPIRVBase::runLLVMToSPIRV(Module &Mod) { M = &Mod; + initialize(Mod); CG = std::make_unique(Mod); Ctx = &M->getContext(); DbgTran->setModule(M); @@ -533,8 +535,10 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(Type *ET, unsigned AddrSpc) { } if (STName.startswith(kSPR2TypeName::ImagePrefix)) { assert(AddrSpc == SPIRAS_Global); - Type *ImageTy = adaptSPIRVImageType(M, ST); - return SaveType(transPointerType(ImageTy, SPIRAS_Global)); + Type *ImageTy = + adjustImageType(TypedPointerType::get(ST, AddrSpc), + kSPIRVTypeName::Image, kSPIRVTypeName::Image); + return SaveType(transType(ImageTy)); } if (STName == kSPR2TypeName::Sampler) return SaveType(transSPIRVOpaqueType( @@ -702,19 +706,17 @@ SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, return SaveType(BM->addImageType( SampledT, Desc, static_cast(Ops[6]))); } else if (TN == kSPIRVTypeName::SampledImg) { - return SaveType( - BM->addSampledImageType(static_cast(transPointerType( - getSPIRVStructTypeByChangeBaseTypeName( - M, ST, kSPIRVTypeName::SampledImg, kSPIRVTypeName::Image), - SPIRAS_Global)))); + return SaveType(BM->addSampledImageType(static_cast( + transType(adjustImageType(TypedPointerType::get(ST, SPIRAS_Global), + kSPIRVTypeName::SampledImg, + kSPIRVTypeName::Image))))); } else if (TN == kSPIRVTypeName::VmeImageINTEL) { // This type is the same as SampledImageType, but consumed by Subgroup AVC // Intel extension instructions. - return SaveType( - BM->addVmeImageINTELType(static_cast(transPointerType( - getSPIRVStructTypeByChangeBaseTypeName( - M, ST, kSPIRVTypeName::VmeImageINTEL, kSPIRVTypeName::Image), - SPIRAS_Global)))); + return SaveType(BM->addVmeImageINTELType(static_cast( + transType(adjustImageType(TypedPointerType::get(ST, SPIRAS_Global), + kSPIRVTypeName::VmeImageINTEL, + kSPIRVTypeName::Image))))); } else if (TN == kSPIRVTypeName::Sampler) return SaveType(BM->addSamplerType()); else if (TN == kSPIRVTypeName::DeviceEvent) @@ -739,22 +741,15 @@ SPIRVType *LLVMToSPIRVBase::transScavengedType(Value *V) { SPIRVType *RT = transType(F->getReturnType()); std::vector PT; for (Argument &Arg : F->args()) { - auto TypePair = - OCLTypeToSPIRVPtr->getAdaptedArgumentType(F, Arg.getArgNo()); - Type *Ty = TypePair.first; - Type *PointeeTy = TypePair.second; + Type *Ty = OCLTypeToSPIRVPtr->getAdaptedArgumentType(F, Arg.getArgNo()); if (!Ty) { Ty = Arg.getType(); if (Ty->isPointerTy()) - PointeeTy = - Scavenger->getArgumentPointerElementType(F, Arg.getArgNo()); + Ty = TypedPointerType::get( + Scavenger->getArgumentPointerElementType(F, Arg.getArgNo()), + Ty->getPointerAddressSpace()); } - SPIRVType *TransTy = nullptr; - if (Ty->isPointerTy()) - TransTy = transPointerType(PointeeTy, Ty->getPointerAddressSpace()); - else - TransTy = transType(Ty); - PT.push_back(TransTy); + PT.push_back(transType(Ty)); } return getSPIRVFunctionType(RT, PT); @@ -5059,15 +5054,13 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI, // for this call, because there is no support for type corresponding to // OpTypeSampledImage. So, in this case, we create the required type here. Value *Image = CI->getArgOperand(0); - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - Type *ImageTy = adaptSPIRVImageType(M, ParamTys[0]); - Type *SampledImgTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::SampledImg); + Type *SampledImgTy = + adjustImageType(getCallValueType(CI, 0), kSPIRVTypeName::Image, + kSPIRVTypeName::SampledImg); Value *Sampler = CI->getArgOperand(1); - return BM->addSampledImageInst( - transPointerType(SampledImgTy, SPIRAS_Global), transValue(Image, BB), - transValue(Sampler, BB), BB); + return BM->addSampledImageInst(transType(SampledImgTy), + transValue(Image, BB), + transValue(Sampler, BB), BB); } case OpFixedSqrtINTEL: case OpFixedRecipINTEL: diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index 0e5cd5ae6f8b7..3c2669a847e97 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -46,6 +46,7 @@ #include "OCLTypeToSPIRV.h" #include "OCLUtil.h" #include "SPIRVBasicBlock.h" +#include "SPIRVBuiltinHelper.h" #include "SPIRVEntry.h" #include "SPIRVEnum.h" #include "SPIRVFunction.h" @@ -68,7 +69,7 @@ using namespace OCLUtil; namespace SPIRV { -class LLVMToSPIRVBase { +class LLVMToSPIRVBase : protected BuiltinCallHelper { public: LLVMToSPIRVBase(SPIRVModule *SMod); bool runLLVMToSPIRV(Module &Mod); From 634156af689335aa662576a78ead69edc4124e0b Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Tue, 8 Nov 2022 07:18:05 -0500 Subject: [PATCH 511/516] Fix test failures during pulldown by adding -opaque-pointers flag --- clang/test/CodeGen/PR44896.ll | 6 ++-- clang/test/CodeGen/thinlto_backend.ll | 32 +++++++++---------- .../thinlto_backend_local_name_conflict.ll | 4 +-- .../thinlto-distributed-type-metadata.cpp | 18 +++++------ .../LTO/Resolution/X86/comdat-mixed-lto.ll | 12 +++---- llvm/test/ThinLTO/X86/constructor-alias.ll | 6 ++-- 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/clang/test/CodeGen/PR44896.ll b/clang/test/CodeGen/PR44896.ll index b155bfcb8293d..ae756aa37af9c 100644 --- a/clang/test/CodeGen/PR44896.ll +++ b/clang/test/CodeGen/PR44896.ll @@ -1,6 +1,6 @@ -; RUN: %clang -fdiscard-value-names -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=WARNING %s -; RUN: %clang -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=NOWARNING %s -; RUN: %clang_cc1 -S -emit-llvm %s -discard-value-names -o /dev/null +; RUN: %clang -Xclang -opaque-pointers -fdiscard-value-names -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=WARNING %s +; RUN: %clang -Xclang -opaque-pointers -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=NOWARNING %s +; RUN: %clang_cc1 -opaque-pointers -S -emit-llvm %s -discard-value-names -o /dev/null ; PR 44896 ; WARNING: ignoring -fdiscard-value-names for LLVM Bitcode diff --git a/clang/test/CodeGen/thinlto_backend.ll b/clang/test/CodeGen/thinlto_backend.ll index dea1a8ac54cd3..37ab6206a9ce5 100644 --- a/clang/test/CodeGen/thinlto_backend.ll +++ b/clang/test/CodeGen/thinlto_backend.ll @@ -1,52 +1,52 @@ ; REQUIRES: x86-registered-target -; RUN: opt -module-summary -o %t1.o %s -; RUN: opt -module-summary -o %t2.o %S/Inputs/thinlto_backend.ll -; RUN: llvm-lto -thinlto -o %t %t1.o %t2.o +; RUN: opt -opaque-pointers -module-summary -o %t1.o %s +; RUN: opt -opaque-pointers -module-summary -o %t2.o %S/Inputs/thinlto_backend.ll +; RUN: llvm-lto -opaque-pointers -thinlto -o %t %t1.o %t2.o ; Ensure clang -cc1 give expected error for incorrect input type -; RUN: not %clang_cc1 -O2 -o %t1.o -x c %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING +; RUN: not %clang_cc1 -opaque-pointers -O2 -o %t1.o -x c %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING ; CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir' ; Ensure we get expected error for missing index file -; RUN: %clang -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR1 +; RUN: %clang -Xclang -opaque-pointers -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR1 ; CHECK-ERROR1: Error loading index file 'bad.thinlto.bc' ; Ensure we ignore empty index file, and run non-ThinLTO compilation which ; would not import f2 ; RUN: touch %t4.thinlto.bc -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc ; RUN: llvm-nm %t4.o | FileCheck --check-prefix=CHECK-OBJ-IGNORE-EMPTY %s ; CHECK-OBJ-IGNORE-EMPTY: T f1 ; CHECK-OBJ-IGNORE-EMPTY: U f2 ; Ensure we don't fail with index and non-ThinLTO object file, and output must ; be empty file. -; RUN: opt -o %t5.o %s -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t5.o -c -fthinlto-index=%t.thinlto.bc +; RUN: opt -opaque-pointers -o %t5.o %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t5.o -c -fthinlto-index=%t.thinlto.bc ; RUN: llvm-nm %t4.o 2>&1 | count 0 ; Ensure f2 was imported. Check for all 3 flavors of -save-temps[=cwd|obj]. -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj +; RUN: llvm-dis -opaque-pointers %t1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; RUN: mkdir -p %T/dir1 ; RUN: cd %T/dir1 -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=cwd +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=cwd ; RUN: cd ../.. -; RUN: llvm-dis %T/dir1/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: llvm-dis -opaque-pointers %T/dir1/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; RUN: mkdir -p %T/dir2 ; RUN: cd %T/dir2 -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps ; RUN: cd ../.. -; RUN: llvm-dis %T/dir2/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: llvm-dis -opaque-pointers %T/dir2/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; CHECK-IMPORT: define available_externally void @f2() ; RUN: llvm-nm %t3.o | FileCheck --check-prefix=CHECK-OBJ %s ; CHECK-OBJ: T f1 ; CHECK-OBJ-NOT: U f2 ; Ensure we get expected error for input files without summaries -; RUN: opt -o %t2.o %s -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR2 +; RUN: opt -opaque-pointers -o %t2.o %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR2 ; CHECK-ERROR2: Error loading imported file {{.*}}: Could not find module summary target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll b/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll index 8a8abd53c4e18..c74271004ade9 100644 --- a/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll +++ b/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll @@ -14,8 +14,8 @@ ; This module will import a() and b() which should cause the read only copy ; of baz from each of those modules to be imported. Check that the both are ; imported as local copies. -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t.bc -c -fthinlto-index=%t.bc.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck --check-prefix=IMPORT %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t.bc -c -fthinlto-index=%t.bc.thinlto.bc -save-temps=obj +; RUN: llvm-dis -opaque-pointers %t.s.3.import.bc -o - | FileCheck --check-prefix=IMPORT %s ; IMPORT: @baz.llvm.{{.*}} = internal global i32 10 ; IMPORT: @baz.llvm.{{.*}} = internal global i32 10 diff --git a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp index 766591f510937..ea78a11f451ca 100644 --- a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp +++ b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp @@ -4,11 +4,11 @@ // Ensure that a distributed backend invocation of ThinLTO lowers the type test // as expected. -// RUN: %clang_cc1 -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s -// RUN: llvm-dis %t.o -o - | FileCheck --check-prefix=TT %s -// RUN: llvm-lto -thinlto -o %t2 %t.o -// RUN: %clang -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang_cc1 -opaque-pointers -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s +// RUN: llvm-dis -opaque-pointers %t.o -o - | FileCheck --check-prefix=TT %s +// RUN: llvm-lto -opaque-pointers -thinlto -o %t2 %t.o +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t3.o | FileCheck --check-prefix=NM %s // The pre-link bitcode produced by clang should contain a type test assume @@ -34,12 +34,12 @@ // compilation pipeline is invoked. If not lowered then LLVM CodeGen may assert. // RUN: touch %t4.thinlto.bc // O2 new PM -// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t4.o | FileCheck --check-prefix=NM %s // O0 new PM -// RUN: %clang -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t4.o | FileCheck --check-prefix=NM %s struct A { diff --git a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll index 96d8f3157b996..844c043f9588f 100644 --- a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll +++ b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll @@ -1,21 +1,21 @@ ; Test of comdat handling with mixed thinlto and regular lto compilation. ; This module is compiled with ThinLTO -; RUN: opt -module-summary -o %t1.o %s +; RUN: opt -opaque-pointers -module-summary -o %t1.o %s ; Input module compiled for regular LTO -; RUN: opt -o %t2.o %p/Inputs/comdat-mixed-lto.ll +; RUN: opt -opaque-pointers -o %t2.o %p/Inputs/comdat-mixed-lto.ll ; The copy of C from this module is prevailing. The copy of C from the ; regular LTO module is not prevailing, and will be dropped to ; available_externally. -; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lxp -r=%t2.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps +; RUN: llvm-lto2 run -opaque-pointers -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lxp -r=%t2.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps ; The Input module (regular LTO) is %t3.0. Check to make sure that we removed ; __cxx_global_var_init and testglobfunc from comdat. Also check to ensure ; that testglobfunc was dropped to available_externally. Otherwise we would ; have linker multiply defined errors as it is no longer in a comdat and ; would clash with the copy from this module. -; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s +; RUN: llvm-dis -opaque-pointers %t3.0.0.preopt.bc -o - | FileCheck %s ; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] ; CHECK: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 @@ -24,8 +24,8 @@ ; CHECK-NOT: declare ; Check the behavior with the prevailing testglobfunc in %t2.o. -; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lx -r=%t2.o,testglobfunc,plx -o %t4 %t1.o %t2.o -save-temps -; RUN: llvm-dis %t4.0.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK2 +; RUN: llvm-lto2 run -opaque-pointers -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lx -r=%t2.o,testglobfunc,plx -o %t4 %t1.o %t2.o -save-temps +; RUN: llvm-dis -opaque-pointers %t4.0.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK2 ; CHECK2: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] ; CHECK2: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 diff --git a/llvm/test/ThinLTO/X86/constructor-alias.ll b/llvm/test/ThinLTO/X86/constructor-alias.ll index 212ff7c425725..3b8db6eb1c81d 100644 --- a/llvm/test/ThinLTO/X86/constructor-alias.ll +++ b/llvm/test/ThinLTO/X86/constructor-alias.ll @@ -7,11 +7,11 @@ ;; ;; clang -c -fpic -O1 -flto=thin a.cc && cp a.o b.o && ld.lld -shared a.o b.so -; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -opaque-pointers -module-summary %s -o %t1.bc ; RUN: cp %t1.bc %t2.bc -; RUN: llvm-lto2 run %t1.bc %t2.bc -r=%t1.bc,_ZTV1A,pl -r=%t1.bc,_ZN1AD0Ev,pl -r=%t1.bc,_ZN1AD1Ev,pl -r=%t1.bc,_ZN1AD2Ev,pl -r=%t1.bc,D1_a,pl -r=%t1.bc,D1_a_a,pl \ +; RUN: llvm-lto2 run -opaque-pointers %t1.bc %t2.bc -r=%t1.bc,_ZTV1A,pl -r=%t1.bc,_ZN1AD0Ev,pl -r=%t1.bc,_ZN1AD1Ev,pl -r=%t1.bc,_ZN1AD2Ev,pl -r=%t1.bc,D1_a,pl -r=%t1.bc,D1_a_a,pl \ ; RUN: -r=%t2.bc,_ZTV1A,l -r=%t2.bc,_ZN1AD0Ev,l -r=%t2.bc,_ZN1AD1Ev,l -r=%t2.bc,_ZN1AD2Ev,l -r=%t2.bc,D1_a,l -r=%t2.bc,D1_a_a,l -o %t3 --save-temps -; RUN: llvm-dis < %t3.2.1.promote.bc | FileCheck %s +; RUN: llvm-dis -opaque-pointers < %t3.2.1.promote.bc | FileCheck %s ; CHECK: @_ZTV1A = available_externally dso_local unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN1AD1Ev, ptr @_ZN1AD0Ev] } ; CHECK: @D1_a = available_externally dso_local unnamed_addr alias void (ptr), ptr @_ZN1AD1Ev From 35084007321fcecbca85676721efb6a43245e302 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Tue, 8 Nov 2022 09:33:33 -0500 Subject: [PATCH 512/516] [SYCL] Fix generation of intel_fpga_mem builtin --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp | 2 +- clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp | 2 +- clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6f85d715f220e..f4d126fd7134e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21467,7 +21467,7 @@ RValue CodeGenFunction::EmitIntelFPGAMemBuiltin(const CallExpr *E) { llvm::Value *Ann = EmitAnnotationCall(F, PtrVal, AnnotStr, SourceLocation()); - cast(Ann)->addFnAttr(llvm::Attribute::ReadNone); + cast(Ann)->setDoesNotAccessMemory(); return RValue::get(Ann); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp index de982c25b7e10..0a624f71be8a5 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp @@ -97,7 +97,7 @@ void foo(float *A, int *B, State *C, State &D) { z = __builtin_intel_fpga_mem(&D, PARAM_1 | PARAM_2, 128, 4, TestVal1, TestVal2); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp index 958bd1389ff52..5a3b3d7be9808 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp @@ -97,7 +97,7 @@ void foo(float *A, int *B, State *C, State &D) { z = __builtin_intel_fpga_mem(&D, PARAM_1 | PARAM_2, 128, 4, TestVal1, TestVal2); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp index 62352886b249e..095c875e32e0a 100644 --- a/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp @@ -67,7 +67,7 @@ void foo(float *A, int *B, State *C, State &D) { f = __builtin_intel_fpga_mem(&F, PARAM_1 | PARAM_2, 127); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { From d9d8669cc2974df4935c5e433efeefbaf3d46e12 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Tue, 8 Nov 2022 11:55:31 -0500 Subject: [PATCH 513/516] Touch yml file to run post commit checks --- .github/workflows/sycl_post_commit.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index 7c885344b0a48..2f73c05da702e 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -15,6 +15,7 @@ on: - .github/workflows/sycl_macos_build_and_test.yml workflow_dispatch: + jobs: # This job generates matrix of tests for LLVM Test Suite test_matrix: From 2629cbb4896918817e6a0fa76757a138fd369cb8 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Thu, 10 Nov 2022 05:31:41 -0500 Subject: [PATCH 514/516] [ESIMD] Disable tests that failed due new "readnone" attribute policy --- llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll | 5 +++++ llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll | 3 +++ 2 files changed, 8 insertions(+) diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll index 438900c72d041..1176187969e2c 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll @@ -4,6 +4,11 @@ ; This test checks that debug info is preserved during lowering ; ESIMD specific constructs. +; Disable test until GenXIntrinsics is updated to reflect recent community +; changes; +; XFAIL:* + + @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 declare spir_func <16 x float> @_Z26__esimd_oword_ld_unalignedIfLi16EjLi0EEN2cl4sycl3ext5intel12experimental5esimd6detail11vector_typeIT_XT0_EE4typeET1_j(i32, i32) diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll index 1dc43d2696812..d6141245c4ec1 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll @@ -9,6 +9,9 @@ ; not practical in this case. ; ; All new test cases should be added to intrins_trans.cpp +; Disable test until GenXIntrinsics is updated to reflect recent community +; changes; +; XFAIL:* target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" From f4b39a59577733c133306ceefcd944c31b93dd0e Mon Sep 17 00:00:00 2001 From: Chang Lin Date: Wed, 9 Nov 2022 09:35:47 -0800 Subject: [PATCH 515/516] Patch bug in 9a45e4bee. Prevent SSA breaks when lifetime intrinsics are moved. --- .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 8 ++++ .../MemCpyOpt/intel-lifetime-move.ll | 44 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 43259cb42da18..2040b032d0cc4 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -931,6 +931,14 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, return false; } + // Code below tries to move the lifetime marker before "C". Check the + // correctness of this motion. + if (SkippedLifetimeStart && SkippedLifetimeStart->getNumOperands() == 3) { + auto *LiveI = cast(SkippedLifetimeStart->getOperand(1)); + if (!DT->dominates(LiveI, C)) + return false; + } + // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. diff --git a/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll b/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll new file mode 100644 index 0000000000000..b27983fe48583 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="memcpyopt" -S %s | FileCheck %s + +; memcpyopt lifetime skipping, is moving the lifetime.start before the def +; of %tm3. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.foo = type { ptr, ptr, i64, i64 } + +declare dso_local i32 @pluto(...) + +define dso_local void @wombat(ptr %meow) local_unnamed_addr { +; CHECK-LABEL: @wombat( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TM:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 +; CHECK-NEXT: [[TM1:%.*]] = alloca [[STRUCT_FOO]], align 8 +; CHECK-NEXT: [[TM2:%.*]] = bitcast ptr [[TM1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) [[TM2]], ptr noundef nonnull align 8 dereferenceable(32) [[MEOW:%.*]], i64 32, i1 false) +; CHECK-NEXT: [[TM3:%.*]] = bitcast ptr [[TM]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TM3]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TM3]], ptr align 8 [[MEOW]], i64 32, i1 false) +; CHECK-NEXT: ret void +; +bb: + %tm = alloca %struct.foo, align 8 + %tm1 = alloca %struct.foo, align 8 + %tm2 = bitcast ptr %tm1 to ptr + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %tm2, ptr noundef nonnull align 8 dereferenceable(32) %meow, i64 32, i1 false) + %tm3 = bitcast ptr %tm to ptr + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %tm3) + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %tm3, ptr noundef nonnull align 8 dereferenceable(32) %tm2, i64 32, i1 false) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } From 6aeabf8eb8c41587f8d5975855b7fc8eb274816d Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Thu, 10 Nov 2022 11:37:02 -0500 Subject: [PATCH 516/516] Revert "Touch yml file to run post commit checks" This reverts commit d9d8669cc2974df4935c5e433efeefbaf3d46e12. --- .github/workflows/sycl_post_commit.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/sycl_post_commit.yml b/.github/workflows/sycl_post_commit.yml index 2f73c05da702e..7c885344b0a48 100644 --- a/.github/workflows/sycl_post_commit.yml +++ b/.github/workflows/sycl_post_commit.yml @@ -15,7 +15,6 @@ on: - .github/workflows/sycl_macos_build_and_test.yml workflow_dispatch: - jobs: # This job generates matrix of tests for LLVM Test Suite test_matrix: